
*** PURPOSE ***
*** This program creates and cleans a range of variables that are used later in alaysis
*** Install relevant programs

*net install http://www.stata.com/users/kcrow/tab2xl

cd "<place directory here>"
global path "<place directory here>"
global data "$path/data"

use "long-file-unbalanced.dta", clear

** Destring id variable
destring xwaveid, replace

** Set time series
xtset xwaveid wave

** Year
gen year=2000+wave

** Household head
destring hhpid, replace
gen head=1 if mod(hhpid,10)==1
replace head=0 if head==.

***============== LABOUR FORCE VARIABLES =============***

** "Lost job within last 12 months involuntarily"
gen fired=0 if lefrd==1
replace fired=1 if lefrd==2

sort xwaveid wave
gen ffired = f.fired

** Job loss probability
gen firedprob=jbmploj if jbmploj>=0 & jbmploj<=100
gen lfiredprob = l.firedprob

** Job loss probability out of 100
sort xwaveid wave
g firedprob1 = firedprob/100
	
** Job loss buckets
gen jlbucket = .
forvalues i = 10(10)100 {
	replace jlbucket=`i' if jbmploj>=`i'-10 & jbmploj<`i'
}
replace jlbucket=100 if jbmploj==100
replace jlbucket=. if jbmploj<0

** Job finding
gen suitprob = jspsuit if jspsuit>=0 & jspsuit<=100
replace suitprob = . if esbrd == 3

** Job finding buckets
gen suitbucket = .
forvalues i = 10(10)100 {
	replace suitbucket=`i' if jspsuit>=`i'-10 & jspsuit<`i'
}
replace suitbucket=100 if jspsuit==100
replace suitbucket=. if jspsuit<0

** Unemployment indicator
gen unemp=0 if  esbr==1 
replace unemp=1 if esbr==2

** Time spent unemployed
gen timeunemp=capune if capune>=0

** Lagged job loss
gen lagjlbucket=l.jlbucket

** Probability of leaving job 
gen leaveprob=jbmplej if jbmplej>=0 & jbmplej<=100

** Contract type
gen contract = jbmcnt if jbmcnt>0

** Probability of leaving job bucketed
gen jleave = .
forvalues i = 10(10)100 {
	replace jleave=`i' if leaveprob>=`i'-10 & leaveprob<`i'
}
replace jleave=100 if leaveprob>=100

gen lagjleave=l.jleave

** Left job for any reason

g leftjob = 1 if lejob == 2
replace leftjob = 0 if lejob == 1
replace leftjob = 1 if fired == 1

** Gen lag variables that function with xtreg
sort xwaveid wave

gen unemp1=l3.unemp
gen unemp2=l2.unemp
gen unemp3=l.unemp
gen unemp4=unemp
gen unemp5=f.unemp
gen unemp6=f2.unemp
gen unemp7=f3.unemp

** Gen lag variables for household head (we want this one more lag as we want the person that is the head in the year before unemployment)
sort xwaveid wave

gen head1=l4.head
gen head2=l3.head
gen head3=l2.head
gen head4=l.head
gen head5=head
gen head6=f.head
gen head7=f2.head

** Lag for experiencing a job loss

gen fired1=l3.fired
gen fired2=l2.fired
gen fired3=l.fired
gen fired4=fired
gen fired5=f.fired
gen fired6=f2.fired
gen fired7=f3.fired

*JOB CHARACTERISTICS 

g parttime = (hges ==2)
g fulltime = (hges ==1)

g casual = (jbmcnt==2)
g permanent = (jbmcnt==3)
g fixedterm = (jbmcnt == 1)

g jtenure = jbempt
replace jtenure = . if jtenure < 0

g jtenuresq = jtenure*jtenure 

g occupation = jbmo06 
replace occupation = . if occupation < 0 
 
g size = jbmwpsz
replace size = . if size < 0
g small = (size < 4)
replace small = . if size == .

g supervisor = (jbmsvsr==1)
 
*INDUSTRY OF EMPLOYMENT  
g division = jbmi61 
g industry = jbmi62
replace division = . if division< 0
replace industry = . if industry < 0


*LOCAL UNEMPLOYMENT
g urate = hhura
replace urate = urate/100
replace urate = . if urate < 0

*UNEMPLOYMENT DURATION (WEEKS)
g weeksunemp = jstwks if jstwks > 0 

g weeks_code = recode(weeksunemp, 0, 8, 16, 24, 32, 40)
replace weeks_code = 0 if unemp == 1 & weeksunemp<=4
replace weeks_code = . if weeksunemp>40

forvalues x = 0(8)40{
	g unempw`x' = unemp
	replace unempw`x' = . if unemp == 1 & weeks_code!=`x'	
}

***============= INDIVIDUAL LEVEL VARIABLES =============***

** AGE
gen age=hgage1 if mod(hhpid,10)==1

forvalues i=2/7	{
	replace age=hgage`i' if mod(hhpid,10)==`i'
}

gen age2=age*age


**AGE GROUP 
g agegroup = .
replace agegroup = 1 if age < 25
replace agegroup = 2 if age > 24 & age < 35
replace agegroup = 3 if age > 34 & age < 45
replace agegroup = 4 if age > 44 & age < 55
replace agegroup = 5 if age > 54 
replace agegroup = . if age == .


**EDUCATION 
g uni = (edhigh1> 0 & edhigh1 < 4)
g tafe = (edhigh1> 3 & edhigh1 < 8)
g school = (edhigh1> 5 & edhigh1 < 10)


**HEALTH CONDITION
g healthprob = (hglth== 1)
g healthprob2 = (helth== 1)

**SEX
g female = (hgsex == 2)

***============== HOUSEHOLD LEVEL VARIABLES =============***

**PERSONS IN HOUSEHOLD
bysort hhid wave: gen persons=_N
gen rpersons=sqrt(persons)

**WEEKLY HOUSEHOLD WAGES
gen awagesh=asinh(hiwscei)

**WEEKLY PUBLIC TRANSFERS
gen atotinc=asinh(hicapi+hiwscei)

g hsize = hhpers
g lhsize = log(hsize)

g adults = hhadult 
g kids = persons-adults 

*(1 point to the first adult, 0.5 points to each additional person who is 15 years and over, and 0.3 to each child under the age of 15)


**PERSONAL DISPOSABLE INCOME
g pdinc = tifditp -tifditn
g lpdinc = log(pdinc)

**HOUSEHOLD DISPOSABLE INCOME
g dinc = hifdip - hifdin

*g emp = (esempst> 0)
g emp = (esbrd == 1)
g nilf = (esbrd==3)

*g jl = (unemp == 1 | nilf == 1)
	  
g eqfactor = 1 if adults > 0 
replace eqfactor = eqfactor + 0.5*(adults-1) if adults > 1 
replace eqfactor = eqfactor + 0.3*(kids) 

** Weighted job loss probability (by household)
gen contrib=wscei/hiwscei
sort xwaveid wave
gen surprise=l.firedprob-fired*100
gen surpriseu=l.firedprob-unemp*100

bysort wave hhid: egen totfiredcheck=sum(contrib) if firedprob!=.
bysort wave hhid: egen totfiredprob=sum(contrib*firedprob) if totfiredcheck==1
replace totfiredprob=. if firedprob==. 

bysort wave hhid: egen contribcheck=sum(contrib) if firedprob!=.
replace totfiredprob=. if contribcheck<1

bysort wave hhid: egen contribcheck2=sum(contrib) if timeunemp!=.
bysort wave hhid: egen tottimeunemp=sum(contrib*timeunemp)
replace tottimeunemp=. if contribcheck<1

replace totfiredprob=totfiredprob/100
replace tottimeunemp=tottimeunemp/100

***============= EXPENDITURE VARIABLES =============***

** Grocery (for food) expenditure
gen lngroci=ln(xpgroci)
replace lngroci = ln(xpgroca) if xpgroca>=0

** Meals eaten outside the home
gen aeatout=asinh(xposmli)
replace aeatout = asinh(xpwmeoa) if xpwmeoa>=0

** Total food expenditure
gen fcons=xposmli+xpfoodi
** Replacing for years with missing imputed data
replace fcons = xpwmeoa + xpgroca if fcons == . & xpwmeoa>=0 & xpgroca>=0

gen lfcons = ln(fcons)

** Total annual consumption
g consall = hxygrci + hxyalci + hxycigi + hxypbti + hxymli + hxymvfi + hxyholi + hxyphii + hxyutli + hxyhmri + hxymvri + hxyedci + hxymcfi + hxywcfi + hxyccfi + hxytlii + hxyoii + hxyhlpi + hxyphmi + hxyncri + hxyucri + hxycmpi + hxytvi + hxywgi + hxyfrni
gen lnconsall=ln(consall)

** Housing expenditure

gen housing=hsrnti+hsmgi
gen lnhousing=ln(housing)

***============= DEFINING HAND-TO-MOUTH HOUSEHOLDS =============***
*Wealth variables - to define HtM
g bank = hwtbani
g shares = hweqini
g bonds = hwcaini

g liqassets = bank + shares + bonds 
g ccdebt = hwccdti
g liq_debt = ccdebt 
g liqnw = liqassets - liq_debt

*Set fortnightly household income
g pay = dinc/26

*Set household credit limit to one month of income (from Kaplan)
g credit_limit = dinc/12

g htm=0 if hwtbani != .

replace htm=1 if (pay/2>=liqnw & liqnw>=0)
replace htm=1 if (pay/2-credit_limit>=liqnw & liqnw<=0)

sort xwaveid wave
g lhtm = l.htm 

gen unemphtm = unemp * lhtm
g unempnhtm = unemp if unemphtm!=.
replace unempnhtm = 0 if unemphtm == 1 

***============= DEFINING MYOPIA =============***

gen myopic = 1 if fisavep>0
replace myopic = 0 if fisavep>=3
replace myopic = . if fisavep == .
replace myopic = l.myopic if myopic == .
replace myopic = f.myopic if myopic == .

gen lmyopic = l.myopic

gen unempmyop = unemp * lmyopic
g lnmyop=abs(1-lmyopic)
g unempnmyop = unemp* lnmyop

***============= DEFINING DISCOURAGED WORKERS =============***

gen discouraged = 1 if jspsuit <60 & jspsuit>=0
replace discourage = 0 if jspsuit>60 & jspsuit<=100
replace discourage = 0 if unemp==0
gen nodisc = 0 
replace nodisc = 1 if unemp == 1
replace nodisc = 0 if discourage == 1
gen fdiscourage = f.discourage
gen fnodisc = f.nodisc

gen unempdisc = discourage*unemp
gen unempnodisc = unemp
replace unempnodisc = 0 if unempdisc==1

** SAVE
save "$data/long-file-cleaned", replace