/*******************************************************************************

	3_unemp.do
	
	This program compiles the annual unemployment rate data used in the analysis

	Last edited: 19 August 2021 

*******************************************************************************/	

clear 

cd "$dir_out"

/*******************************************************************************
	1 - Current vintage of SA2 unemployment rates
		*Source: National Skills Commission 
		*2010q1 to 2020q2
		*ASGS 2016
*******************************************************************************/	

import delimited using "$dir_in\SALM Unsmoothed SA2 Datafiles (ASGS 2016) - June quarter 2020", varnames(3) clear

rename sa2code sa2_2016

drop if dataitem=="Unsmoothed unemployment rate (%)"
replace dataitem = "LF"  if dataitem=="Unsmoothed labour force (persons)"
replace dataitem = "UN"  if dataitem=="Unsmoothed unemployment (persons)"

drop statisticalarealevel2sa22016asgs

* cleaning

quietly ds dataitem sa2_2016, not

foreach var of varlist `r(varlist)' {
	replace  `var'="." if `var'=="-"
	replace  `var' = subinstr(`var', " ", "",.) 
	replace  `var' = subinstr(`var', ",", "",.) 
	destring `var', replace
}
*
compress 

rename (mar* jun* sep* dec*) (no_mar* no_jun* no_sep* no_dec*)

* reshape file

egen id = group(sa2_2016 dataitem)
reshape long no_, i(id) j(date) string
drop id

egen id = group(sa2_2016 date)

reshape wide no_, i(id) j(dataitem) string

drop id

gen q = substr(date,1,3)
gen y = "20"+substr(date,4,5)

destring y, replace

gen     finyear = y
replace finyear = y+1 if q=="sep" | q=="dec"

gen urate = no_UN / no_LF * 100

collapse (mean) urate no_UN no_LF (count) n=urate, by(sa2_2016 finyear) 

save salm_recent, replace

/*******************************************************************************
	2 - Historical data on SLA unemployment rates
		*Source: DEEWR (provided on request - data was provided in several different files)
		*Data smoothed using 4-quarter trailing average; the final qtr in each FY is the FY average
		*Convert from SLA to SA2 (ASGS 2011)
*******************************************************************************/

* 1999-2001 (ASGC 1996)

import excel "$dir_in\1996 ASGC SALM database (Sep 98 to Dec 01) - smoothed series.xls", sheet("Smth Unemployment") clear cellrange(A3) firstrow

keep SLACode F J N

rename (F J N) (u_1999 u_2000 u_2001)
drop if SLACode==.
rename SLACode sla_code_1996

save salm_99_01, replace 

import excel "$dir_in\1996 ASGC SALM database (Sep 98 to Dec 01) - smoothed series.xls", sheet("Smth Labour Force") clear cellrange(A3) firstrow

keep SLACode F J N

rename (F J N) (lf_1999 lf_2000 lf_2001)
drop if SLACode==.
rename SLACode sla_code_1996

merge 1:1 sla_code_1996 using salm_99_01, nogen

save salm_99_01, replace 

* Convert to SA2 (ASGS 2011)

use CG_SLA_1996_SA2_2011, clear

destring sla_code_1996, replace 

merge m:1 sla_code_1996 using salm_99_01
drop if    _merge!=3
drop       _merge

foreach var of varlist lf* u* {
replace `var' = `var'*ratio 
}
collapse (sum) lf* u*, by(sa2_maincode_2011)

reshape long lf_ u_, i(sa2_maincode_2011) j(finyear) 

rename lf_ lf_C
rename u_  u_C

save salm_99_01, replace 

* 2002-2007 (ASGC 2001)

import excel "$dir_in\2001 ASGC SALM database (Mar 02 to Dec 07) - smoothed series.xls", sheet("smoothed unemployment") clear cellrange(A3) firstrow

keep SLACode D H L P T X

rename (D H L P T X) (u_2002 u_2003 u_2004 u_2005 u_2006 u_2007)

replace u_2002="." if u_2002=="-"
replace u_2003="." if u_2003=="-"

destring u_2002 u_2003, replace

drop if SLACode==.

rename SLACode sla_code_2001

save salm_02_07, replace 

import excel "$dir_in\2001 ASGC SALM database (Mar 02 to Dec 07) - smoothed series.xls", sheet("smoothed labour force") clear cellrange(A3) firstrow

keep SLACode D H L P T X

rename (D H L P T X) (lf_2002 lf_2003 lf_2004 lf_2005 lf_2006 lf_2007)

replace lf_2002="." if lf_2002=="-"
replace lf_2003="." if lf_2003=="-"

destring lf_2002 lf_2003, replace

drop if SLACode==.

rename SLACode sla_code_2001

merge 1:1 sla_code_2001 using salm_02_07, nogen

save salm_02_07, replace 

* Convert to SA2 (ASGS 2011)

use CG_SLA_2001_SA2_2011.dta, clear

destring sla_code_2001, replace 

merge m:1 sla_code_2001 using salm_02_07
drop if    _merge!=3
drop       _merge

foreach var of varlist lf* u* {
replace `var' = `var'*ratio 
}
collapse (sum) lf* u*, by(sa2_maincode_2011)

reshape long lf_ u_, i(sa2_maincode_2011) j(finyear) 

rename lf_ lf_B
rename u_  u_B

save salm_02_07, replace 

* 2008-2012 (ASGC 2006) 
* NBL uses SQ08 instead of JQ08 as the 2007/08 observation, due to large number of missing values for JQ08

import excel "$dir_in\2006 ASGC SALM database - (Mar 08 onwards) - March 2013.xlsx", sheet("Unemployment") clear cellrange(A3) firstrow

keep E H L P T SLACode

rename (E H L P T) (u_2008 u_2009 u_2010 u_2011 u_2012)

drop if SLACode==.

rename SLACode sla_code_2006

save salm_08_12, replace 

import excel "$dir_in\2006 ASGC SALM database - (Mar 08 onwards) - March 2013.xlsx", sheet("Labour Force") clear cellrange(A3) firstrow

keep E H L P T SLACode

rename (E H L P T) (lf_2008 lf_2009 lf_2010 lf_2011 lf_2012)

drop if SLACode==.

rename SLACode sla_code_2006

merge 1:1 sla_code_2006 using salm_08_12, nogen

save salm_8_12, replace

* Convert to SA2 (ASGS 2011)

use CG_SLA_2006_SA2_2011.dta, clear

destring sla_code_2006, replace 

merge m:1 sla_code_2006 using salm_8_12
drop if    _merge!=3
drop       _merge

foreach var of varlist lf* u* {
replace `var' = `var'*ratio 
}
collapse (sum) lf* u*, by(sa2_maincode_2011)

reshape long lf_ u_, i(sa2_maincode_2011) j(finyear) 

rename lf_ lf_A
rename u_  u_A

save salm_8_12, replace 


/*******************************************************************************
	2 - Combine historical files 
*******************************************************************************/

* ASGS 2011 basis

merge 1:1 sa2_maincode_2011 finyear using salm_02_07, nogen
merge 1:1 sa2_maincode_2011 finyear using salm_99_01, nogen

rename sa2_maincode_2011 sa2_2011 

sort sa2_2011 finyear

compress

save salm_historical_sa2_2011, replace

* ASGS 2016 basis

use sa2_2011_2016_finyear, clear

keep if finyear>=1999 & finyear<=2012

merge m:1 sa2_2011 finyear using salm_historical_sa2_2011
drop if _merge!=3
drop    _merge

foreach i in lf_A u_A lf_B u_B lf_C u_C {
	replace `i' = `i' * ratio_2011_2016
}

collapse (sum) lf_A u_A lf_B u_B lf_C u_C, by(sa2_2016 finyear)

save salm_historical_sa2_2016, replace


* end of do file 