clear
set more off


* Uploading the cleaned and combined test variables data, and setting aside some variables to stay in string form
*****************************************************************************************************************
use "Data/Temp/stars_raw", clear

foreach var of varlist coefficient standard_deviation standard_deviation_2 standard_deviation_3 t_stat t_stat_2 t_stat_3 p_value{
destring `var', force gen(`var'_num)
}


* Storing raw test statistics
*****************************
generate t_stat_raw = t_stat_num if t_stat_num != .
replace t_stat_raw = 0 if coefficient_num == 0 & t_stat_raw == .
replace t_stat_raw = coefficient_num / standard_deviation_num if t_stat_raw == .
replace t_stat_raw = invnormal(p_value_num/2) if t_stat_raw == .
replace t_stat_raw = abs(t_stat_raw)


* De-rounding some numbers
**************************
set seed 51

foreach var of varlist coefficient standard_deviation standard_deviation_2 standard_deviation_3 t_stat t_stat_2 t_stat_3 p_value{
egen z = ends(`var'), punct(.) tail
generate y = length(z)
generate `var'_num_sm = `var'_num + (runiform() - 0.5) * 10^(-y)
drop z y
}

generate t_stat_sm = t_stat_num_sm if t_stat_num_sm != .
replace t_stat_sm = 0 if coefficient_num_sm == 0 & t_stat_sm == .
replace t_stat_sm = coefficient_num_sm / standard_deviation_num_sm if t_stat_sm == .
replace t_stat_sm = invnormal(p_value_num_sm/2) if t_stat_sm == .
replace t_stat_sm = . if t_stat_raw == . & t_stat_sm != . 
replace t_stat_sm = abs(t_stat_sm)


* Generating weights, so that our data set is more complete when merged with Brodeur data (which has weights)
*************************************************************************************************************
generate t = 1
egen obs_by_article =  sum(t), by(journal_id issue article_page)
egen obs_by_table = sum(t), by(journal_id issue article_page table_panel)
drop t

egen tag_table_article = tag(journal_id issue article_page table_panel)
egen tab_by_article = sum(tag_table_article), by(journal_id issue article_page)
drop tag_table_article

generate weight_article = 1/obs_by_article
generate weight_table = 1/obs_by_table * 1/tab_by_article
generate weight_table_round = round(weight_table*10^8)
generate weight_article_round = round(weight_article*10^8)

sort journal_id issue article_page

merge m:1 journal_id issue article_page using "Data/Temp/supp_data_article"
drop _merge

foreach var of varlist *{
label var `var' ""
}
destring article_page, force replace

append using "Data/Source/inputs/brodeur_final_stars_supp"


* Creating the relevant subsample dummies and dropping the redundant ones from Brodeur et al
********************************************************************************************
foreach var of varlist D_*{
drop `var'
}

replace peer = "yes" if peer == "yes" | journal_id == "American Economic Review" | journal_id == "Journal of Political Economy" | journal_id == "Quarterly Journal of Economics"
replace data_availability = lower(data_availability)
replace codes_availability = lower(codes_availability)

* The dummies written in format all_D_* will not be picked up by the charting scripts, only the summary statistics output
* They cover key classifications that appear in both the central bank and top journals dataset
generate all_D = (main != "control")
generate all_D_pub = (peer == "yes" & main != "control")
generate all_D_dataCode = (data_availability == "yes" & codes_availability == "yes" & main != "control")
generate all_D_eye = (eye_catcher == "yes" & main != "control")

* The dummies written in format D_* will be picked up by the charting scripts and parts of the summary statistics output
generate D_cb = ((journal_id == "RBA Research Discussion Paper" | journal_id == "Minneapolis Fed Staff Report" | journal_id == "RBNZ Discussion Paper") & main != "control")
generate D_topJ = ((journal_id == "American Economic Review" | journal_id == "Journal of Political Economy" | journal_id == "Quarterly Journal of Economics") & main != "control" & type_emp != "one side")

generate D_rba = (journal_id == "RBA Research Discussion Paper" & main != "control")
generate D_rbnz = (journal_id == "RBNZ Discussion Paper" & main != "control")
generate D_minn = (journal_id == "Minneapolis Fed Staff Report" & main != "control")

generate D_explore_cb = (exploratory == "yes" & D_cb == 1 )
generate D_dataDriven_cb = (data_driven == "yes" & D_cb == 1)
generate D_noExpData_cb = (exploratory == "no" & data_driven == "no" & D_cb == 1 )

generate D_pub_cb = (peer == "yes" & D_cb == 1)
generate D_noPub_cb = (peer == "no" & D_cb == 1)

generate D_dataCode_cb = (data_availability == "yes" & codes_availability == "yes" & D_cb == 1)
generate D_dataCode_topJ = (data_availability == "yes" & codes_availability == "yes" & D_topJ == 1)

generate D_eye_cb = (eye_catcher == "yes" & D_cb == 1)
generate D_noEye_cb = (eye_catcher == "no" & D_cb == 1)
generate D_eye_topJ = (eye_catcher == "yes" & D_topJ == 1)

generate D_control = (main == "control")


* Cosmetics and passing data to 4_export.do  
*******************************************
replace main = "no" if main=="No"
replace main = "yes" if main=="Yes"

order journal_id year issue article_page first_author num_authors data_availability codes_availability peer table_panel ///
eye_catcher row column main exploratory data_driven coefficient coefficient_num coefficient_num_sm standard_deviation standard_deviation_num standard_deviation_num_sm ///
p_value p_value_num p_value_num_sm t_stat t_stat_raw t_stat_sm t_stat_num t_stat_num_sm obs* tab_by_article weight* standard_deviation_2* standard_deviation_3* /// YOU DROPPED "precision*" from here 
t_stat_2* t_stat_3* model type type_emp ras thanks field field_2 negative_result editor_d editor_d_before tenured* non_tenured* ///
phd_age, first

save "Data/Final/final_stars_supp", replace