/*******************************************************************************

	2_variables.do
	
	This program creates the key variables used in the regression analysis. 

	Creators:    James Bishop and Iris Day
	Last edited: 10 November 2020 

*******************************************************************************/	

clear all

* set directory
cd "S:\2020-z008 Capacity_SW\JK_analysis\RDP_documentation\output"

* import data
use skinny, clear
	
*declare panel 
xtset absrid absmid

/*******************************************************************************
	1 - generate key variables
*******************************************************************************/	

		*employment & hours
		gen emp = (lfstatus==1) 		
		gen casual= (statemp==12) if (statemp==12 | statemp==11) // missing if not an employee
		gen employee = (statemp==11 | statemp==12)
		gen emp_narrow = (lfstatus==1 & awaywork==-1)
		gen emp_broad = (lfstatus==1 | awaywork==41 | awaywork==42)	
		gen hours_alt = hrswork // alternative hours measure with 0 for non-employed
		replace hours_alt=0 if emp==0
		
		*job characterisitics
		replace tenurmth=. if tenurmth==-1 | tenurmth==0
		replace tenureyr=. if tenureyr==-1 | tenureyr==0
		replace ind06div=. if ind06div==0 | ind06div==-1
		replace occ13skl=. if occ13skl<1 | occ13skl>5
		gen onejob = (multjob==11) if emp==1
		
		*1-digit industry dummies
		tab ind06div, gen(i)
		rename (i1 i2 i3 i4 i5 i6 i7 i8 i9 i10 i11 i12 i13 i14 i15 i16 i17 i18 i19) ///
	      (b_agr b_min b_man b_ele b_con b_who b_ret b_acc b_tra b_inf b_fin b_ren b_pst b_adm b_pub b_edu b_hea b_art b_oth)
		  
		*2-digit industry codes
		gen ind06grp_str = string(ind06grp) if ind06grp>0
		gen ind06_2dig_str = substr(ind06grp_str,2,2) if ind06grp>0 & strlen(ind06grp_str)==4
		replace ind06_2dig_str = substr(ind06grp_str,3,2) if ind06grp>0 & strlen(ind06grp_str)==5
		
		*demographics
		gen recentmigrant = (elapyrar>=0 & elapyrar<=5)
		replace recentmigrant = 0 if cobmcg==1000 
		gen female =  (sex==2)
		gen student = educatt>=2 & educatt<=7
		gen age_sq = age^2
		
		*time variables
		gen month = month(dofm(absmid))
		gen feb = (month==2)

		*generate variables as at Feb in 2018, 2019 and 2020
		foreach i in emp_narrow hours_alt {
		gen temp`i'feb = `i' if feb==1
		bysort absrid: egen `i'feb = max(temp`i'feb) 
		drop temp`i'feb
		}
	    gen ch_hrs = hours_alt - hours_altfeb // zero if nilf/unemployed
		gen ch_emp_narrow = emp_narrow - emp_narrowfeb
		drop emp_narrowfeb hours_altfeb
		

/*******************************************************************************
	2 - define treatment and control groups for baseline model
*******************************************************************************/			

*Main variable
			gen tgroup = .
			*Treatment group (casuals with 12-23 months tenure in Feb)
			replace tgroup = 1 if tenureyr==1 & casual==1 & feb==1
			*Control group (casuals with tenures of 6 or more months and less than 11 months in Feb)
			replace tgroup = 0 if tenurmth>=6 & tenurmth<=10 & casual==1 & feb==1
		
*For placebo test with non-casuals
			gen tgroupnc = .
			*Treatment group (employees with 12-23 months tenure in Feb)
			replace tgroupnc = 1 if tenureyr==1 & employee==1 & feb==1
			*Control group (employees with tenures of 6 or more months and less than 11 months in Feb)
			replace tgroupnc = 0 if tenurmth>=6 & tenurmth<=10 & employee==1 & feb==1

/*******************************************************************************
	3 - sample exclusions
*******************************************************************************/				
			
gen sample = 1

	*public sector 
	replace sample = 0 if ind06grp==04262 | ind06grp==04263 | ind06grp==04281 | ind06grp==09472 | ind06grp==11621 ///
	                     | (ind06grp>=15751 & ind06grp<=15772) | ind06grp==16810 | ind06grp==17840

	*financial sector
	replace sample = 0 if ind06grp==11622 

	*under 16
	replace sample = 0 if age <16

	replace sample = . if feb!=1
 
/*******************************************************************************
	4 - variables for attrition analysis
*******************************************************************************/
 
 gen marker = 1
 
 forvalues j = 1/5 {
 gen marker_f`j' = F`j'.marker
 replace marker_f`j' = 0 if marker_f`j'==.
 replace marker_f`j' = . if feb!=1
 }

/*******************************************************************************
	5 - variables for migration analysis
*******************************************************************************/

*Main variable
			gen tgroupmig = .
			*Treatment group 
			replace tgroupmig = 1 if (elapyrar>=1 & elapyrar<=6) & cobmcg==1000 & cobmcg!=1100 & emp==1 & feb==1
			*Control group 
			replace tgroupmig = 0 if (elapyrar>=1 & elapyrar<=6) & cobmcg!=1000 & cobmcg!=1100 & emp==1 & feb==1 

* Additional controls
			gen     tenure = tenureyr if tenureyr>0 
			replace tenure = 0        if tenureyr>0 & tenureyr<1

			gen     empcat = 1 if statemp==12 // casual
			replace empcat = 2 if statemp==11 // permanent
			replace empcat = 3 if empcat==. & statemp!=0  // other (includes self-employed & contributing family)
			
			tab empcat, gen(empz)
			rename (empz1 empz2 empz3) (i_casual i_permanent i_otheremp)
	
					
save setup.dta, replace

clear all

* end of do file