/*******************************************************************************

	3_summary_stats.do
	
	This program produces the key descriptive statistics. 
	
	Exports results to:
	- descriptive_stats.smcl
	- DD_graphs.xls
	- parallel_conditional.xlsx
	- additional_descriptives.xlsx

	Creators:    James Bishop and Iris Day
	Last edited: 10 November 2020 

*******************************************************************************/	

clear all

cd "S:\2020-z008 Capacity_SW\JK_analysis\RDP_documentation\output"

log using descriptive_stats.smcl, replace

use setup.dta, clear

/*******************************************************************************
	1 - number of people worker-eligible for JK (used in the Wald estimator)
*******************************************************************************/	

*Start with employment
	sum emp [w=weight] if absmid==tm(2020m2) 
	di r(mean) * r(sum_w)
	
*Now estimate total number of people eligible
	preserve
	    drop if sample==0 // industry and age exclusions
		keep if absmid==tm(2020m2) // estimate as at Feb 2020
		keep if emp==1 // need to be employed
	
		*drop casuals with less than 12 monts
		drop if casual==1 & tenureyr<1

		sum emp [w=weight] 
		di r(mean) * r(sum_w)
		
		*by industry
		bysort ind06div: sum emp [w=weight]		
	restore
	
*Industry composition of estimation sample - for stratifying the scaling factor
drop if sample==0 
		*combine 2 small industries
	    clonevar ind06div_=ind06div
		recode ind06div_ (4 10 11 = 20)
		label define ind06div 20 "OTHER_SMALL", add
		label values ind06div_ ind06div
		
		*baseline (casual) sample
		forvalues j = 0/3 {	
		table ind06div_ if F`j'.marker==1 & feb==1 & survyear==2020 & tgroup!=. 
		}
		drop ind06div_
		*alternative (migrant) sample
		forvalues j = 0/3 {	
		table ind06div if F`j'.marker==1 & feb==1 & survyear==2020 & tgroupmig!=. & occ13skl!=.
		}

/*******************************************************************************
	2 - Summary statistics - baseline sample and migrant sample
*******************************************************************************/	

local controls     b_agr b_min b_man b_ele b_con b_who b_ret b_acc b_tra b_inf b_fin b_ren b_pst ///
                   b_adm b_pub b_edu b_hea b_art b_oth occ13skl hours_alt onejob student age female recentmigrant 
local controls_mig b_agr b_min b_man b_ele b_con b_who b_ret b_acc b_tra b_inf b_fin b_ren b_pst ///
				   b_adm b_pub b_edu b_hea b_art b_oth i_casual i_permanent i_otheremp tenure occ13skl hours_alt onejob student age female 	
				   
				   
	*Balance at June (Table 1 and C1)
		tabstat `controls' if F4.marker==1 & feb==1 & survyear==2020, by (tgroup) statistics (n mean)
		preserve
		keep if feb==1 & survyear==2020 & F4.marker==1 & tgroup!=. 
		foreach i in `controls' {
		ttest `i', by (tgroup)
		}
		restore
	
	*Characteristics of non-casuals and all employees at June (Table C1)		
		tabstat `controls' if F4.marker==1 & feb==1 & survyear==2020, by (casual) statistics (n mean)
		
	*Sample size of treatment and control group in baseline model (footnote in Section 5.3)
		forvalues j = 0/5 {	
		tab tgroup if F`j'.marker==1 & feb==1 & survyear==2020
		}
		* premature attrition 
		gen markerdue_f1 = marker_f1
		replace markerdue_f1 = 1 if mnthsel<=7
		gen markerdue_f2 = marker_f2
		replace markerdue_f2 = 1 if mnthsel<=6
		gen markerdue_f3 = marker_f3
		replace markerdue_f3 = 1 if mnthsel<=5
		gen markerdue_f4 = marker_f4
		replace markerdue_f4 = 1 if mnthsel<=4
		gen markerdue_f5 = marker_f5
		replace markerdue_f5 = 1 if mnthsel<=3		
		forvalues j = 1/5 {
		gen premattrit_f`j' =0
		replace premattrit_f`j' = 1 if marker_f`j'==0 & markerdue_f`j'==1
		} 
		
		forvalues j = 1/5 {	
		tab tgroup premattrit_f`j' if feb==1 & survyear==2020 & tgroup!=.
		}

	*Characteristics of migrant sample at June (Table E1)		
		tabstat `controls_mig' if F4.marker==1 & feb==1 & survyear==2020, by (tgroupmig) statistics (n mean) casewise
		preserve
		keep if feb==1 & survyear==2020 & F4.marker==1 & occ13skl!=.
		foreach i in `controls_mig' {
		ttest `i', by (tgroupmig)
		}
		restore
		
	*Sample size of treatment and control group in migrant approach
		forvalues j = 0/5 {	
		tab tgroupmig if F`j'.marker==1 & feb==1 & survyear==2020 & occ13skl!=.
		}	
		* premature attrition in migrant approach
		forvalues j = 1/5 {	
		tab tgroupmig premattrit_f`j' if feb==1 & survyear==2020 & tgroupmig!=. & occ13skl!=.
		}
		
		log close

/*******************************************************************************
	3 - DD graphs (top panels of Figures 2 and 5)
*******************************************************************************/	

drop if sample==0 

capture {
erase DD_graphs.xls
}

foreach i in emp ch_hrs {
forvalues j = 0/5{
gen `i'_f`j'= F`j'.`i'
gen `i'_l`j'= L`j'.`i'
}
}

foreach k in mean count {
local depvars emp_l* emp_f* ch_hrs_l* ch_hrs_f*
*DiD
	preserve 
	drop if tgroupnc==.
	keep if feb==1 & (survyear==2020 | survyear==2019)
	collapse (`k') `depvars' , by (tgroupnc casual survyear)	
	drop  *l5 *l4 *f0
	order *l3 *l2 *l1 *l0 *f1 *f2 *f3 *f4 *f5
	order `depvars'
	order survyear casual tgroupnc	
	sort  survyear casual tgroupnc
	drop if survyear==2019 & casual==0
	export excel survyear tgroupnc casual `depvars' using "DD_graphs", ///
	sheet("stat_`k'") sheetreplace firstrow(variables)
	restore
	}
	
/*******************************************************************************
	4 - Conditional trends graph (top panel of Figure E3)
*******************************************************************************/	

keep if absmid>=tm(2019m11) & absmid<=tm(2020m07)

gen E    = lfstatus==1 if lfstatus!=.
gen E_feb=lfstatus==1 if month(dofm(absmid))==2

foreach v in tgroupmig ind06div occ13skl onejob student age age_sq female E_feb tenure empcat {

replace `v' = L5.`v' if month(dofm(absmid))==7
replace `v' = L4.`v' if month(dofm(absmid))==6
replace `v' = L3.`v' if month(dofm(absmid))==5
replace `v' = L2.`v' if month(dofm(absmid))==4
replace `v' = L1.`v' if month(dofm(absmid))==3
replace `v' = L0.`v' if month(dofm(absmid))==2
replace `v' = F1.`v' if month(dofm(absmid))==1
replace `v' = F2.`v' if month(dofm(absmid))==12
replace `v' = F3.`v' if month(dofm(absmid))==11
}

gen post=month(dofm(absmid))!=2

capture {
erase parallel_conditional.xlsx
}
putexcel set parallel_conditional.xlsx, modify sheet(conditional)

putexcel A2 = "L3", bold     
putexcel A3 = "L2", bold     
putexcel A4 = "L1", bold     
putexcel A5 = "L0", bold     
putexcel A6 = "F1", bold     
putexcel A7 = "F2", bold     
putexcel A8 = "F3", bold     
putexcel A9 = "F4", bold     
putexcel A10= "F5", bold     
putexcel B1 = "Control", bold     
putexcel C1 = "Treat", bold     
putexcel D1 = "Sample size", bold     

foreach k in 1 2 3 5 6 7 8 9 {

preserve

local date = tm(2019m10)+`k'

egen mis = rowmiss(tgroupmig ind06div occ13skl onejob student age female tenure empcat)

keep if (absmid==tm(2020m2) | absmid==`date') & mis==0

bysort absrid: egen total_obs = count(absrid)

qui xtreg E (i.tgroupmig i.ind06div i.occ13skl i.onejob i.student c.age c.age_sq i.female i.tenure i.empcat)##i.post if total_obs==2 & E_feb==1, cluster(absrid) fe

margins tgroupmig#post, noestimcheck

matrix contr  = r(b)[1,2]
matrix treat  = r(b)[1,4]
matrix sample = r(_N)[1,2]/2

local j = `k'+1

putexcel B`j' = matrix(contr)
putexcel C`j' = matrix(treat)
putexcel D`j' = matrix(sample)

restore
}

matrix unit = (1,1)

putexcel B5 = matrix(unit)
	
/*******************************************************************************
	5 -  Away from work by employment status, 1991-2020, monthly, weighted (Figure 3)
*******************************************************************************/		

use skinny_full, clear	

keep if absmid>=tm(1991m1) & absmid<=tm(2020m7)

gen away = "_away" if awaywork!=-1 & awaywork!=-9
replace away = "_notaway" if awaywork==-1 & awaywork!=-9

gen employed = "_emp" if lfstatus==1 & lfstatus!=.
replace employed = "_notemp" if lfstatus!=1 & lfstatus!=.

* weighted (w) and sample counts (n)

collapse (sum) w=weight (count) n=weight, by(absmid away employed)

egen id = group(absmid away)

reshape wide w n, j(employed) i(id) string
drop id
reshape wide w_emp n_emp w_notemp n_notemp, j(away) i(absmid) string

order absmid w* n*

export excel using additional_descriptives.xlsx, sheet("A_awaywork") firstrow(variables) replace

clear

/*******************************************************************************
	6 -   Away from work >=4 weeks but paid in last 4 weeks
********************************************************************************/

use skinny_full, clear

collapse (sum) E=weight (count) n=weight if (awaywork==31 | awaywork==32) & absmid>=tm(2014m7) & absmid<=tm(2020m7) & lfstatus==1, by(absmid)

export excel using additional_descriptives.xlsx, sheet("B_standown") firstrow(variables) 

/*******************************************************************************
	7 -  Unemployment rate by country of birth
********************************************************************************/

use skinny_full, clear

gen     cob = "_aus" if  cobmcg==1100
replace cob = "_oce" if (cobmcg==1000 & cobmcg!=1100) & (elapyrar>=1 & elapyrar<=6)
replace cob = "_oth" if (cobmcg>=2000 & cobmcg<=9000) & (elapyrar>=1 & elapyrar<=6)

drop if cob=="" | lfstatus==3

collapse (sum) w=weight (count) n=weight, by(absmid cob lfstatus)

egen id = group(absmid cob)
reshape wide w n, i(id) j(lfstatus)

gen urate = w2/(w1+w2)*100
gen n = n1+n2
drop id w1 w2 n1 n2

reshape wide urate n, i(absmid) j(cob) string

tsset absmid

foreach x in n_aus n_oce n_oth {
gen `x'_12m = `x'+L1.`x'+L2.`x'+L3.`x'+L4.`x'+L5.`x'+L6.`x'+L7.`x'+L8.`x'+L9.`x'+L10.`x'+L11.`x'
drop `x'
rename `x'_12m `x'
}
foreach x in urate_aus urate_oce urate_oth {
gen `x'_12mma = (`x'+L1.`x'+L2.`x'+L3.`x'+L4.`x'+L5.`x'+L6.`x'+L7.`x'+L8.`x'+L9.`x'+L10.`x'+L11.`x')/12
sum `x'_12mma if absmid==tm(2008m11)
replace `x'_12mma = `x'_12mma - r(mean)
}
keep if absmid>=tm(2008m11) & absmid<=tm(2009m12)
keep  absmid n_aus n_oce n_oth urate_aus_12mma urate_oce_12mma urate_oth_12mma
order absmid  urate_aus_12mma urate_oce_12mma urate_oth_12mma n_aus n_oce n_oth

export excel using additional_descriptives.xlsx, sheet("C_urate_cob") firstrow(variables) 

clear all

* end of do file	