*===============================================================================

* Created by: Callan Windsor 
* RDP2023-06: Firms' Price-setting Behaviour: Evidence from Earnings Calls
                                                                										               
*===============================================================================

clear all
set more off
local location <INSERT LOCATION>
import excel using "`location'analytical_database_full_sample", firstrow

*_______________________________________________________________________________
* Set the panel variables 
*_______________________________________________________________________________

egen time  = group(qqyyyy)
egen time2 = group(hhyyyy)
egen group = group(Company)

*_______________________________________________________________________________
* Drop the second transcript for companies that report twice in a given quarter. 
* These transcripts are typically for the same event, but delivered to different
* audiences (e.g. BHP's offshore investors vs. local investors)
*_______________________________________________________________________________

sort group EventTime
quietly bysort group time:  gen dup = cond(_N==1,0,_n)
sort group EventTime
drop if dup>1
drop dup 

*_______________________________________________________________________________
* xtset
*_______________________________________________________________________________

xtset group time
sort time
xtdescribe, patterns(10)

*_______________________________________________________________________________
* Generate some descriptive stats
*_______________________________________________________________________________

tabstat group, by(month) s(count)
tabstat group, by(qqyyyy) s(count)
tabstat group, by(hhyyyy) s(count)

sort group Sector
quietly by group Sector:  gen dup = cond(_N==1,0,_n)
tabstat group if dup<2, by(Sector) s(count)
drop dup 

g demand_total = zsl_demand + zsl_demand_neg
g input_total = zsl_costs_aggregate + zsl_costs_aggregate_neg
g final_total = zsl_price_aggregate + zsl_price_aggregate_neg
g labour_total = zsl_labour + zsl_labour_neg
tabstat zsl_para_count demand_total input_total final_total labour_total, s(sum)
 
*_______________________________________________________________________________
* Construct dictionary-based variables
*_______________________________________________________________________________

* net off negative references 
foreach i in cost_aggregate foodcosts generalinputcosts insurance inventorycosts labourcosts ///
materialscost productioncosts rent servicecosts transportcosts price_aggregate demand_aggregate {
replace `i' = `i' - `i'_neg	
drop `i'_neg
}

* divide by total words 
foreach i in cost_aggregate foodcosts generalinputcosts insurance inventorycosts labourcosts ///
materialscost productioncosts rent servicecosts transportcosts price_aggregate demand_aggregate {
replace `i' = `i'/total_words
}

*_______________________________________________________________________________
* Create and output data for Appendix Table D2
*_______________________________________________________________________________

* create within firm variables
foreach i in zsl_demand ///
zsl_costs_aggregate ///
zsl_import ///
zsl_labour ///
zsl_price_aggregate ///
zsl_transportation ///
zsl_input_cost {
gen `i'_total = `i' + `i'_neg
by group, sort: egen `i'_mean = mean(`i'_total)
gen `i'_within_id = `i' - `i'_mean
}

by group, sort: egen zsl_supplyshortages_mean = mean(zsl_supplyshortages) 
gen zsl_supplyshortages_within_id = zsl_supplyshortages - zsl_supplyshortages_mean

* examine within-firm correlation in the level of the variables
corr zsl_price_aggregate_within_id ///
zsl_demand_within_id ///
zsl_costs_aggregate_within_id ///
zsl_import_within_id ///
zsl_labour_within_id ///
zsl_supplyshortages_within_id ///
zsl_transportation_within_id 
matrix within = r(C)

* drop unwanted variables
foreach i in zsl_demand ///
zsl_costs_aggregate ///
zsl_import ///
zsl_labour ///
zsl_price_aggregate ///
zsl_transportation ///
zsl_input_cost {
drop `i'_total
drop `i'_mean 
drop `i'_within_id 
}

drop zsl_supplyshortages_mean
drop zsl_supplyshortages_within_id

*_______________________________________________________________________________
* Create and output data for Table 4
*_______________________________________________________________________________

* net off negative references
* generate descriptive statistics for Table 4 of RDP
foreach i in zsl_demand ///
zsl_costs_aggregate ///
zsl_import ///
zsl_labour ///
zsl_price_aggregate ///
zsl_transportation ///
zsl_input_cost {
replace `i' = `i' - `i'_neg
xtsum `i' if hhyyyy<2021
xtsum `i' if hhyyyy>2020
by group, sort: egen `i'_mean = mean(`i')
gen `i'_within_id = `i' - `i'_mean
drop `i'_neg
}

xtsum zsl_supplyshortages if hhyyyy<2021
xtsum zsl_supplyshortages if hhyyyy>2020
by group, sort: egen zsl_supplyshortages_mean = mean(zsl_supplyshortages)
gen zsl_supplyshortages_within_id = zsl_supplyshortages - zsl_supplyshortages_mean

*_______________________________________________________________________________
* Create and output data for Figure 9
*_______________________________________________________________________________

foreach i in zsl_price_aggregate ///
zsl_demand ///
zsl_costs_aggregate zsl_import zsl_labour zsl_supplyshortages zsl_transportation {
kdensity `i'_within_id if hhyyyy<2021, bwidth(2) ///
gen(`i'_pts_pre `i'_den_pre)
}

foreach i in zsl_price_aggregate ///
zsl_demand ///
zsl_costs_aggregate zsl_import zsl_labour zsl_supplyshortages zsl_transportation {
kdensity `i'_within_id if hhyyyy>2020, bwidth(2) ///
gen(`i'_pts_post `i'_den_post) 
}

br *_pts_pre  *_pts_post *_den_pre  *_den_post

*_______________________________________________________________________________
* Create variables from zero-shot text classifier used in regression analysis
*_______________________________________________________________________________

* divide by total paragraphs
foreach i in zsl_demand ///
zsl_costs_aggregate ///
zsl_import ///
zsl_labour ///
zsl_price_aggregate ///
zsl_transportation ///
zsl_input_cost ///
zsl_hiringdifficulties ///
zsl_supplyshortages {
replace `i' = `i'/zsl_para_count
}

foreach i in zsl_demand ///
zsl_costs_aggregate ///
zsl_import ///
zsl_labour ///
zsl_price_aggregate ///
zsl_transportation ///
zsl_input_cost ///
zsl_supplyshortages {
drop `i'_within_id
drop `i'_mean
by group, sort: egen `i'_mean = mean(`i')
gen `i'_within_id = `i' - `i'_mean
}

*_______________________________________________________________________________
* Create and output data for Appendix Table D1	
*_______________________________________________________________________________

corr zsl_price_aggregate_within_id ///
zsl_demand_within_id ///
zsl_costs_aggregate_within_id ///
zsl_import_within_id ///
zsl_labour_within_id ///
zsl_supplyshortages_within_id ///
zsl_transportation_within_id 
matrix within = r(C)

/*
foreach i in zsl_demand ///
zsl_costs_aggregate ///
zsl_import ///
zsl_labour ///
zsl_price_aggregate ///
zsl_transportation ///
zsl_input_cost ///
zsl_hiringdifficulties ///
zsl_supplyshortages {
bysort group: egen mean_`i' = mean(`i')
bysort group: egen sd_`i' = sd(`i')
replace `i' = (`i' - mean_`i') / sd_`i'
}
*/

*_______________________________________________________________________________
* Create future-tense variables used in regression analysis
*_______________________________________________________________________________

foreach i in zsl_demand_f ///
zsl_costs_aggregate_f ///
zsl_import_f ///
zsl_labour_f ///
zsl_price_aggregate_f ///
zsl_transportation_f ///
zsl_input_cost_f {
replace `i' = `i' - `i'_neg
}

foreach i in zsl_demand_f ///
zsl_costs_aggregate_f ///
zsl_import_f ///
zsl_labour_f ///
zsl_price_aggregate_f ///
zsl_transportation_f ///
zsl_input_cost_f ///
zsl_hiringdifficulties_f ///
zsl_supplyshortages_f {
replace `i' = `i'/zsl_para_count
}

/*
foreach i in zsl_demand_f ///
zsl_costs_aggregate_f ///
zsl_import_f ///
zsl_labour_f ///
zsl_price_aggregate_f ///
zsl_transportation_f ///
zsl_input_cost_f ///
zsl_hiringdifficulties_f ///
zsl_supplyshortages_f {
bysort group: egen mean_`i' = mean(`i')
bysort group: egen sd_`i' = sd(`i')
replace `i' = (`i' - mean_`i') / sd_`i'
}
*/

*_______________________________________________________________________________
* Rename variables for brevity
*_______________________________________________________________________________

rename (cost_aggregate foodcosts generalinputcosts insurance inventorycosts labourcosts materialscost productioncosts rent servicecosts transportcosts price_aggregate demand_aggregate zsl_demand zsl_price_aggregate zsl_costs_aggregate zsl_hiringdifficulties zsl_import zsl_input_cost zsl_labour zsl_supplyshortages zsl_transportation zsl_demand_f zsl_price_aggregate_f zsl_costs_aggregate_f zsl_hiringdifficulties_f zsl_import_f zsl_input_cost_f zsl_labour_f zsl_supplyshortages_f zsl_transportation_f) ///
(agg_cost food general insurance inventory labour materials production rent services transport finalprice demand z_demand z_finalprice z_agg_cost z_hiring z_import z_input z_labour z_supply z_transport z_demand_f z_finalprice_f z_agg_cost_f z_hiring_f z_import_f z_input_f z_labour_f z_supply_f z_transport_f)

/*
collapse (mean) general, by(hhyyyy)
line general hhyyyy
sort hhyyyy
br
*/

*_______________________________________________________________________________
* Construct dummy variables
*_______________________________________________________________________________

* Time
sort group time
g covid = 0
replace covid = 1 if hhyyyy>2020

* Sentiment direction assymetries
sort group time
bysort group: gen z_agg_cost_change = z_agg_cost - z_agg_cost[_n-1] 
g z_agg_cost_increase = 0
replace z_agg_cost_increase = 1 if z_agg_cost_change>=0

sort group time
bysort group: gen z_agg_cost_change_f = z_agg_cost_f - z_agg_cost_f[_n-1] 
g z_agg_cost_increase_f = 0
replace z_agg_cost_increase_f = 1 if z_agg_cost_change_f>=0

sort group time
bysort group: gen z_demand_change = z_demand - z_demand[_n-1] 
g z_demand_increase = 0
replace z_demand_increase = 1 if z_demand_change>=0

sort group time
bysort group: gen z_demand_change_f = z_demand_f - z_demand_f[_n-1] 
g z_demand_increase_f = 0
replace z_demand_increase_f = 1 if z_demand_change_f>=0

* Industry
encode Sector, gen(sector_numeric)

*_______________________________________________________________________________
* Panel regressions
*_______________________________________________________________________________

eststo clear

*Zero-shot; ALL
sort group time
eststo: xtreg z_finalprice i.covid#c.z_demand i.covid#c.z_agg_cost i.time, fe ro
est sto z1
test 0.covid#c.z_demand =  0.covid#c.z_agg_cost
test 1.covid#c.z_demand =  1.covid#c.z_agg_cost

*Z; ALL; future
sort group time
eststo: xtreg z_finalprice_f i.covid#c.z_demand_f i.covid#c.z_agg_cost_f i.time, fe ro
est sto z2

*Z-SUBS; ALL 
sort group time
eststo: xtreg z_finalprice i.covid#c.z_demand i.covid#c.z_import i.covid#c.z_labour i.covid#c.z_supply i.covid#c.z_transport i.time, fe ro
est sto z3
test 0.covid#c.z_labour =  1.covid#c.z_labour
test 0.covid#c.z_import =  1.covid#c.z_import
test 0.covid#c.z_supply =  1.covid#c.z_supply

*Z; ALL; asymmetries
sort group time
eststo: xtreg z_finalprice i.z_demand_increase#c.z_demand i.z_agg_cost_increase#c.z_agg_cost i.time if z_demand_change!=. & z_agg_cost_change!=., fe ro
est sto z4
test 0.z_demand_increase#c.z_demand =  1.z_demand_increase#c.z_demand
test 0.z_agg_cost_increase#c.z_agg_cost =  1.z_agg_cost_increase#c.z_agg_cost

*Z; ALL; asymmetries future
sort group time
eststo: xtreg z_finalprice_f i.z_demand_increase_f#c.z_demand_f i.z_agg_cost_increase_f#c.z_agg_cost_f i.time if z_demand_change_f!=. & z_agg_cost_change_f!=., fe ro
est sto z5
test 0.z_demand_increase_f#c.z_demand_f =  1.z_demand_increase_f#c.z_demand_f
test 0.z_agg_cost_increase_f#c.z_agg_cost_f =  1.z_agg_cost_increase_f#c.z_agg_cost_f

*_______________________________________________________________________________
* Tables
*_______________________________________________________________________________

esttab z1 z2 z3 z4 z5, onecell nogap not r2 star (* 0.10 ** 0.05 *** 0.01) cells(b(fmt(3) star) ci(fmt(3))) level(90) ///
drop(*.time _cons) mtitle("all" "fut" "subs" "asym" "asymfut") ///
coeflabels( ///
1.covid#c.z_demand "1.demand" 0.covid#c.z_demand "0.demand" /// 
1.covid#c.z_agg_cost "1.z_agg_cost" 0.covid#c.z_agg_cost "0.z_agg_cost" /// 
1.covid#c.z_demand_f "1.demand_f" 0.covid#c.z_demand_f "0.demand_f" /// 
1.covid#c.z_agg_cost_f "1.z_agg_cost_f" 0.covid#c.z_agg_cost_f "0.z_agg_cost_f" /// 
1.covid#c.z_hiring "1.z_hiring" 0.covid#c.z_hiring "0.z_hiring" ///
1.covid#c.z_import "1.z_import" 0.covid#c.z_impor "0.z_import" ///
1.covid#c.z_labour "1.z_labour" 0.covid#c.z_labour "0.z_labour" ///
1.covid#c.z_supply "1.z_supply" 0.covid#c.z_supply "0.z_supply" ///
1.covid#c.z_transport "1.z_transport" 0.covid#c.z_transport "0.z_transport" ///
1.z_demand_increase#1.covid#c.z_demand "1.1.demand" 0.z_demand_increase#0.covid#c.z_demand "0.0.z_demand" ///
1.z_demand_increase#0.covid#c.z_demand "1.0.demand" 0.z_demand_increase#1.covid#c.z_demand "0.1.z_demand" ///
1.z_agg_cost_increase#1.covid#c.z_agg_cost "1.1.agg_cost" 0.z_agg_cost_increase#0.covid#c.z_agg_cost "0.0.z_agg_cost" ///
1.z_agg_cost_increase#0.covid#c.z_agg_cost "1.0.agg_cost" 0.z_agg_cost_increase#1.covid#c.z_agg_cost "0.1.z_agg_cost" ///
1.z_demand_increase_f#1.covid#c.z_demand_f "1.1.demand_f" 0.z_demand_increase_f#0.covid#c.z_demand_f "0.0.z_demand_f" ///
1.z_demand_increase_f#0.covid#c.z_demand_f "1.0.demand_f" 0.z_demand_increase_f#1.covid#c.z_demand_f "0.1.z_demand_f" ///
1.z_agg_cost_increase_f#1.covid#c.z_agg_cost_f "1.1.agg_cost_f" 0.z_agg_cost_increase_f#0.covid#c.z_agg_cost_f "0.0.z_agg_cost_f" ///
1.z_agg_cost_increase_f#0.covid#c.z_agg_cost_f "1.0.agg_cost_f" 0.z_agg_cost_increase_f#1.covid#c.z_agg_cost_f "0.1.z_agg_cost_f")  replace

*_______________________________________________________________________________
* Industry dummies
*_______________________________________________________________________________

eststo clear

sort group time
eststo: xtreg z_finalprice i.sector_numeric#c.z_demand i.sector_numeric#c.z_agg_cost i.time#i.sector, fe ro level(90)
est sto ind

esttab ind, onecell nogap not r2 star (* 0.10 ** 0.05 *** 0.01) cells(b(fmt(3) star) ci(fmt(3))) level(90) ///
drop(*.time#* _cons) mtitle("Ind") coeflabels( ///
1.sector_numeric#c.z_demand "Communication Services" ///
2.sector_numeric#c.z_demand "Consumer Discretionary" ///
3.sector_numeric#c.z_demand "Consumer Staples" ///
4.sector_numeric#c.z_demand "Energy" ///
5.sector_numeric#c.z_demand "Financials" /// 
6.sector_numeric#c.z_demand "Health Care" ///
7.sector_numeric#c.z_demand "Industrials" ///
8.sector_numeric#c.z_demand "IT" ///
9.sector_numeric#c.z_demand "Materials" ///
10.sector_numeric#c.z_demand "Real Estate" ///
11.sector_numeric#c.z_demand "Utilities" ///
1.sector_numeric#c.z_agg_cost "Communication Services" ///
2.sector_numeric#c.z_agg_cost "Consumer Discretionary" ///
3.sector_numeric#c.z_agg_cost "Consumer Staples" ///
4.sector_numeric#c.z_agg_cost "Energy" ///
5.sector_numeric#c.z_agg_cost "Financials" /// 
6.sector_numeric#c.z_agg_cost "Health Care" ///
7.sector_numeric#c.z_agg_cost "Industrials" ///
8.sector_numeric#c.z_agg_cost "IT" ///
9.sector_numeric#c.z_agg_cost "Materials" ///
10.sector_numeric#c.z_agg_cost "Real Estate" ///
11.sector_numeric#c.z_agg_cost "Utilities") replace

