clear
set more off


* Writing subroutines to export histogram and kernel density data for derounded and unweighted test statistics 
*************************************************************************************************************
*capture : ssc install kdens // Package for kernel densities with boundary adjustments.
*capture : ssc install moremata // Package dependency for kdens

* This one does not filter out data-driven model selection and exploratory research
capture program drop export_stars 
program define export_stars
preserve
local width 0.01
local max 10
local a =  regexr("`1'","D_","")
quietly: twoway__histogram_gen t_stat_sm if t_stat_sm < `max' & t_stat_sm > 0.0 & `1' == 1, start(0) width(`width') generate(obs_`a' x)
drop if x == .
keep x obs_`a'
sort x
save "Data/Temp/obsy_`a'", replace
restore
end
* Export_stars takes one argument, in string format, which is typically a subsample dummy variable that is called D_SOMETHING.

* This one filters out data-driven model selection and exploratory research
capture program drop export_stars_filter 
program define export_stars_filter
preserve
local width 0.01
local max 10
local a =  regexr("`1'","D_","")
quietly: twoway__histogram_gen t_stat_sm if t_stat_sm < `max' & t_stat_sm > 0.0 & `1' == 1 & D_noExpData_cb == 1, start(0) width(`width') generate(obs_`a' x)
drop if x == .
keep x obs_`a'
sort x
save "Data/Temp/obsy_`a'", replace
restore
end
* Export_stars takes one argument, in string format, which is typically a subsample dummy variable that is called D_SOMETHING.



* Exporting main subsamples using unweighted rounded test statistics
********************************************************************
use "Data/Final/final_stars_supp", clear
foreach var of varlist D_cb D_topJ D_explore_cb D_dataDriven_cb D_noExpData_cb D_dataCode_topJ D_eye_topJ D_control {
export_stars "`var'"
}

foreach var of varlist D_rba D_rbnz D_minn D_pub_cb D_noPub_cb D_dataCode_cb D_eye_cb D_noEye_cb{
export_stars_filter "`var'"
}



* Adding heavily smoothed control distribution for extension 
************************************************************
local width 0.01
local max 10
local precision 0.2

* For this example need to generate all x's because will be used as a counterfactual distribution (one of the "inputs") later
generate x = .
forvalues i = 1(1)1000{
capture : replace x = -0.005 + _n/100 if _n == `i'
}
kdens t_stat_sm if t_stat_sm < `max' & t_stat_sm > 0.0 & D_control == 1, kernel(epanechnikov) ll(0) ul(`max') bw(`precision') gen(cSmooth) at(x) nograph
drop if x == .
keep x cSmooth
sort x
save "Data/Temp/cSmooth", replace


* Adding empirical simulated distributions and merge the exported files
***********************************************************************
local width 0.01
local max 10
local precision 0.1

use "Data/Source/inputs/simu_wdi", clear
quietly : twoway__histogram_gen t if t < `max' & t > 0.0 , start(0) width(`width') gen(real x)
drop if x == .
keep x real //d_real
sort x
save "Data/Temp/realy", replace

use "Data/Source/inputs/simu_qog", clear
quietly : twoway__histogram_gen t if t < `max' & t > 0.0, start(0) width(`width') gen(qog x)
drop if x == .
keep x qog //d_qog
sort x
merge 1:1 x using "Data/Temp/realy", nogenerate
save "Data/Temp/realy", replace

use "Data/Source/inputs/simu_psid", clear
quietly : twoway__histogram_gen t if t < `max' & t > 0.0, start(0) width(`width') gen(psid x)
drop if x == .
keep x psid //d_psid
sort x
merge 1:1 x using "Data/Temp/realy", nogenerate
save "Data/Temp/realy", replace

use "Data/Source/inputs/simu_vhlss", clear
quietly : twoway__histogram_gen t if t < `max' & t > 0.0, start(0) width(`width') gen(vhlss x)
drop if x == .
keep x vhlss //d_vhlss
sort x
merge 1:1 x using "Data/Temp/realy", nogenerate
save "Data/Temp/realy", replace

use "Data/Temp/realy", clear
local files : dir "Data/Temp" files "obsy_*"
foreach file in `files'{
merge 1:1 x using "Data/Temp/`file'"
drop _merge
erase "Data/Temp/`file'"
}
merge 1:1 x using "Data/Temp/cSmooth"
drop _merge
erase "Data/Temp/cSmooth.dta"
* Some of the merges will be unmatched, becuase the histogram drops bins that record no values. The final dataset has 1000 bins though.


* Addding Cauchy, Student, and Normal distributions to file
***********************************************************

generate norm = (1-normal(x))*2
generate student = (tden(1,x))*2
generate cauchy_05 = 2/_pi*0.5/(x^2+0.5^2)
generate cauchy_15 = 2/_pi*1.5/(x^2+1.5^2)
generate cauchy_2 = 2/_pi*2/(x^2+2^2)
foreach var of varlist *{
replace `var'= 0 if `var' == .
}

drop if x == 0
sort x
rename x xxx //The pool-adjacent-violators algorithm used in the next R script uses variable name x for its output


* Passing data to 5_1_estimation_non_param.R and 5_2_estimation.param.do
************************************************************************
save "Data/Temp/export_param", replace
outsheet using "Data/Temp/export_to_R.txt", comma replace names

erase "Data/Temp/realy.dta"