use "INTERNAL_ONLY\Data\Working Data\data_1_master_clean.dta",clear

* Generate some additional variables for data quality checks
gen female = 1 if gender=="Female"
replace female = 0 if gender=="Male"

*** Figure 2 - Randomisation Balance
* Tests the demographic means of sub-samples who saw different options.

* Generate variables indicating what options 
gen rba_acc = 1 if rbaclaim_a ==1 | rbaclaim_b==1
gen comm_acc = 1 if rbaclaim_a ==0 | rbaclaim_b==0
gen price_25 = 1 
gen price_20 = 1
gen no_one = 1 if privacy_a == 1 | privacy_b==1
replace no_one = 0 if missing(no_one)
gen rba_only=1 if privacy_a==2 & drbaclaim==1 | privacy_b==2 & drbaclaim==-1
replace rba_only = 0 if missing(rba_only)
gen austrac_only=1 if privacy_a==3 | privacy_b==3 
replace austrac_only = 0 if missing(austrac_only)
gen comm_only=1 if privacy_a==3 & drbaclaim==-1 | privacy_b==3 & drbaclaim==1
replace comm_only = 0 if missing(comm_only)
gen rba_austrac = 1 if privacy_a==4 & drbaclaim==1 | privacy_b ==4 & drbaclaim==-1
replace rba_austrac = 0 if missing(rba_austrac)
gen comm_austrac = 1 if privacy_a==4 & drbaclaim==-1 | privacy_b ==4 & drbaclaim==1
replace comm_austrac = 0 if missing(comm_austrac)

* Calculate means for the groups
eststo: mean female age_final hhinc_num, over(rba_acc)
eststo: mean female age_final hhinc_num, over(comm_acc)
eststo: mean female age_final hhinc_num, over(price_25)
eststo: mean female age_final hhinc_num, over(price_20)
eststo: mean female age_final hhinc_num, over(no_one)
eststo: mean female age_final hhinc_num, over(rba_only)
eststo: mean female age_final hhinc_num, over(austrac_only)
eststo: mean female age_final hhinc_num, over(comm_only)
eststo: mean female age_final hhinc_num, over(rba_austrac)
eststo: mean female age_final hhinc_num, over(comm_austrac)

* Output means and confidence intervals for the groups to Excel
esttab using INTERNAL_ONLY/Outputs/Figure_2_Data.csv, replace ///
	label ci level(95)
eststo clear

*** Figure A_1 - Testing for Attrition Bias
* This test relies on data from the survey provider's technical report
* which gives gender age and income data for the initial phase of the survey

** Age and gender of initially recruited
import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 1") cellrange(C1:E17) firstrow clear
drop if Recruited==.

expand Recruited
sort Age
gen stage = "1Recruited"
keep Gender Age stage

save "INTERNAL_ONLY\Data\Working Data\attrition_test_Recruited.dta",replace

* Age and gender of those who finished the pre-diary survey
import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 1") cellrange(F1:H17) firstrow clear
drop if Prediary==.

expand Prediary
sort Age
gen stage = "2Prediary"
keep Gender Age stage

save "INTERNAL_ONLY\Data\Working Data\attrition_test_Prediary.dta",replace

* Age and gender of those who finished the payments diary
import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 1") cellrange(I1:K17) firstrow clear
drop if Paymentsdiary==.

expand Paymentsdiary
sort Age
gen stage = "3Paymentsdiary"
keep Gender Age stage

save "INTERNAL_ONLY\Data\Working Data\attrition_test_Paymentsdiary.dta",replace

* Age and gender of those who finished the post diary survey (full CPS complete)
import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 1") cellrange(L1:N17) firstrow clear

drop if Postdiary==.
expand Postdiary
sort Age
gen stage = "4Postdiary"
keep Gender Age stage

* Paste all the data together
append using "INTERNAL_ONLY\Data\Working Data\attrition_test_Recruited.dta"
append using "INTERNAL_ONLY\Data\Working Data\attrition_test_Prediary.dta"
append using "INTERNAL_ONLY\Data\Working Data\attrition_test_Paymentsdiary.dta"

* Encode demographic variables and stage complete.
encode stage,gen(stage_f)
gen Female = 1 if Gender=="F"
replace Female = 0 if missing(Female)

eststo: mean Age, over(stage_f)
eststo: mean Female, over(stage_f)

* Income
** Note Income variable differs from hhinc used in main analysis, because
** we only have that data from those who finished the survey. Income
** is much less granular.
* Income of those who were recruited
import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 4") cellrange(B1:C5) firstrow clear
drop if Recruited==.

expand Recruited
sort Income
gen stage = "1Recruited"
keep Income stage

save "INTERNAL_ONLY\Data\Working Data\attrition_test_inc_Recruited.dta",replace

* Income of those who finished the pre-diary survey

import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 4") cellrange(D1:E5) firstrow clear
drop if Prediary==.

expand Prediary
sort Income
gen stage = "2Prediary"
keep Income stage

save "INTERNAL_ONLY\Data\Working Data\attrition_test_inc_Prediary.dta",replace

* Income of those who finished the payments diary

import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 4") cellrange(F1:G5) firstrow clear
drop if Paymentsdiary==.

expand Paymentsdiary
sort Income
gen stage = "3Paymentsdiary"
keep Income stage

save "INTERNAL_ONLY\Data\Working Data\attrition_test_inc_Paymentsdiary.dta",replace


* Income of those who finished the post-diary survey

import excel "INTERNAL_ONLY\Data\Tables_Data_Quality_Report.xlsx", sheet("Table 4") cellrange(H1:I5) firstrow clear

drop if Postdiary==.
expand Postdiary
sort Income
gen stage = "4Postdiary"
keep Income stage

* Combine observations
append using "INTERNAL_ONLY\Data\Working Data\attrition_test_inc_Recruited.dta"
append using "INTERNAL_ONLY\Data\Working Data\attrition_test_inc_Prediary.dta"
append using "INTERNAL_ONLY\Data\Working Data\attrition_test_inc_Paymentsdiary.dta"

* Encode stage variable
encode stage, gen(stage_f)
eststo: mean Income, over(stage_f)

esttab using INTERNAL_ONLY/Outputs/Figure_A1_Data.csv, replace ///
	label ci level(95)
eststo clear