/*******************************************************************************

	2_wages.do
	
	This program compiles the annual wages data used in the analysis

	Last edited: 19 August 2021 

*******************************************************************************/	

clear

cd "$dir_out"

/*******************************************************************************
	1 - Personal Income in Australia (ABS)
	    *Previously known as Estimates of Personal Income for Small Areas
		*2011/12-2017/18
		*ASGS 2016
		*Produced from the LEED, based on ATO administrative data
*******************************************************************************/

* import

import excel using "$dir_in\6524055002_DO003.xls", sheet("Table 3.4") cellrange(A8:AK2304) clear

drop if real(A)==.

rename A sa2_2016
destring sa2_2016, replace

local j 2012
foreach i in C D E F G H I {
	rename `i' ws_no_`j'
	local j = `j' + 1
}
local j 2012
foreach i in Q R S T U V W {
	rename `i' ws_val_`j'
	local j = `j' + 1
}
local j 2012
foreach i in AE AF AG AH AI AJ AK {
	rename `i' ws_pp_`j'
	local j = `j' + 1
}

drop B J-P X-AD

* drop areas that were only populated with data for 2016/17 and 2017/18

foreach i in ws_no_ ws_val_ ws_pp_ {
	drop if `i'2012=="np" & `i'2013=="np" & `i'2014=="np" & `i'2015=="np" & `i'2016=="np"
	}

* reshape from wide to long	
	
reshape long ws_no_ ws_val_ ws_pp_, i(sa2_2016) j(finyear)

foreach i in ws_no ws_val ws_pp {
	rename `i'_ `i'
	replace `i'="." if `i'=="np"
	destring `i', replace
}
*
gen source_file = "2012_2018"

* save  

save income_sa2_2012_2018_ws, replace

/*******************************************************************************
	2 - Estimates of Personal Income for Small Areas (ABS)
		*2010/11-2015/16
		*ASGS 2016
		*Used for 2010/11 levels; other years were not revised 
*******************************************************************************/

* import

import excel using "$dir_in\employee income 2010-11 to 2015-16.xls", sheet("Table 4") cellrange(A8:AF2317) clear

rename A sa2_2016

local j 2011
foreach i in C D E F G H {
	rename `i' ws_no_`j'
	local j = `j' + 1
}
local j 2011
foreach i in O P Q R S T {
	rename `i' ws_val_`j'
	local j = `j' + 1
}
local j 2011
foreach i in AA AB AC AD AE AF {
	rename `i' ws_pp_`j'
	local j = `j' + 1
}

drop B I-N U-Z

reshape long ws_no_ ws_val_ ws_pp_, i(sa2_2016) j(finyear)

foreach i in ws_no ws_val ws_pp {
	rename `i'_ `i'
	replace `i'="." if `i'=="na"
	destring `i', replace
}
*

gen source_file = "2011_2016"

* append datasets to create 2010/11 to 2017/18 file

append using income_sa2_2012_2018_ws

erase income_sa2_2012_2018_ws.dta

keep if (finyear==2011 & source_file=="2011_2016") | (finyear>2011 & source_file=="2012_2018")

sort sa2_2016 finyear

save income_sa2_2011_2018_ws.dta, replace

clear

/*******************************************************************************
	2 - Estimates of Personal Income for Small Areas (ABS)
		*2005/06-2010/11
		*ASGS 2011
*******************************************************************************/

import excel using "$dir_in\6524055002do004_200506201011.xls", sheet(Table_7) cellrange(A8:AL2174)

rename A sa2_2011
drop   B

local j 2006
foreach i in C D E F G H {
	rename `i' ws_no_`j'
	local j = `j' + 1
}
local j 2006
foreach i in I J K L M N {
	rename `i' ws_val_`j'
	local j = `j' + 1
}
local j 2006
foreach i in O P Q R S T {
	rename `i' ws_pp_`j'
	local j = `j' + 1
}

keep ws* sa2_2011

reshape long ws_no_ ws_val_ ws_pp_, i(sa2_2011) j(finyear)

foreach i in ws_no ws_val ws_pp {
	rename `i'_ `i'
	replace `i'="." if `i'=="-"
	destring `i', replace
}

* save file with ASGS 2011 definitions

save income_sa2_2006_2011, replace
clear

* create file that converts data to ASGS 2016 definitions

use sa2_2011_2016_finyear, clear

keep if finyear>=2006 & finyear<=2011

merge m:1 sa2_2011 finyear using income_sa2_2006_2011

drop if _merge!=3
drop    _merge

drop ws_pp 

foreach i in ws_no ws_val {
	replace `i' = `i' * ratio_2011_2016
}
*
gen source_file = "2006_2011"

collapse (sum) ws_no ws_val (first) source_file, by(sa2_2016 finyear)

gen long ws_pp = ws_val / ws_no

save income_sa2_2006_2011_2016asgs, replace

* Also create a ASGS 2016 file with 1 year lead of the growth rates (for splicing purposes)

xtset sa2_2016 finyear

foreach x in ws_no ws_val ws_pp {
	gen `x'_gr = F1.`x' / `x' * 100 - 100
	drop `x'
}
*
drop if finyear==2011

save income_sa2_2006_2011_2016asgs_growth, replace
clear


/*******************************************************************************
	3 - Combined SA2 file
	
		Combine the files to create complete time-series (ASGS 2016), 2005/06-2017/18
	
		splice the growth rates from the earlier file onto the more recent data (to correct for level shifts due to breaks)
*******************************************************************************/

use income_sa2_2011_2018_ws, clear

append using income_sa2_2006_2011_2016asgs_growth

format sa2_2016  %12.0g

sort   sa2_2016 finyear
xtset  sa2_2016 finyear

foreach i in ws_no ws_val ws_pp {
	replace `i' = F1.`i' / (1+`i'_gr/100) if finyear==2010
	replace `i' = F1.`i' / (1+`i'_gr/100) if finyear==2009
	replace `i' = F1.`i' / (1+`i'_gr/100) if finyear==2008
	replace `i' = F1.`i' / (1+`i'_gr/100) if finyear==2007
	replace `i' = F1.`i' / (1+`i'_gr/100) if finyear==2006
}
*

drop ws_no_gr ws_val_gr ws_pp_gr 

order sa2_2016 finyear ws_no ws_val ws_pp  

compress 

save wages_sa2_2016, replace


/*******************************************************************************
	3 - ABS Estimates of Personal Income for Small Areas - SLA level data (historical)
	
	    Convert to ASGS 2011 SA2s using correspondence files 
		(correspondences for ASGS 2016 are not available for all years required)
*******************************************************************************/

* 2003/04-2008/09 (ASGC 2008)

import delimited "$dir_in\0304_0809_supertable_extract.csv", delimiter(",") clear varnames(6) colrange(1:5) rowrange(6:8442)

foreach var of varlist wagesalaryearnerspsns-averagewagesalaryincome {
replace `var'="." if `var'=="-"
destring `var', replace
}

gen sla_code_2008 = substr(v2,5,1) + substr(v2,-4,.)

rename v1 finyear
rename wagesalaryearnerspsns ws_no
rename wagesalaryincome ws_value 

drop v2 averagewagesalaryincome

* Convert to 2011 SA2

reshape wide ws_no ws_value, i(sla_code_2008) j(finyear) 

save ws_0304_0809, replace 

use CG_SLA_2008_SA2_2011, clear 

merge m:1 sla_code_2008 using ws_0304_0809
drop if    _merge==2
drop       _merge

foreach var of varlist ws* {
replace `var' = `var'*ratio 
}
collapse (sum) ws*, by(sa2_maincode_2011)

reshape long ws_no ws_value, i(sa2_maincode_2011) j(finyear) 

rename ws_no    ws_no_A
rename ws_value ws_value_A

gen    source_file_A = "2004_2009"

save ws_0304_0809, replace 

* 2001/02-2005/06 (ASGC depends on the reference year) 

local year 2002

foreach j in do002 do004 do006 do008 do010 {

forvalues i = 1(1)6 {

import excel "$dir_in\6524055002`j'_200102200506.xls", clear cellrange(A7) sheet(Table_`i')

keep A C D

keep if strlen(A)==9

compress 

save t`i', replace
}
clear
forvalues i = 1(1)6 {
append using t`i'
erase t`i'.dta
}
gen finyear=`year'
rename A sla_code_`year'
rename C ws_no
rename D ws_value
replace sla_code_`year' = substr(sla_code_`year',1,1) + substr(sla_code_`year',-4,.)

save ws_0102_0506_year`year', replace 
local year = `year'+1
}

* Convert to 2011 SA2s and append files

forvalues i = 2(1)6 {

use CG_SLA_200`i'_SA2_2011, clear 

merge m:1 sla_code_200`i' using ws_0102_0506_year200`i'
drop if    _merge==2
drop       _merge

foreach var in ws_no ws_value {
replace `var' = `var'*ratio 
}
collapse (sum) ws*, by(sa2_maincode_2011)

gen finyear = 2000+`i'

save ws_0102_0506_year200`i', replace
}

clear 
forvalues i = 2(1)6 {
append using ws_0102_0506_year200`i'
erase        ws_0102_0506_year200`i'.dta
}

rename ws_no    ws_no_B
rename ws_value ws_value_B

gen    source_file_B = "2002_2006"

save ws_0102_0506, replace 

* 1995/96-2000/01 (ASGC 2001) 

local year 1996

foreach i in sla96 sla97 sla98 sla99 sla00 {

import excel "$dir_in\6524055001_table1_sourceofincomex`i'_asgc01.xls", clear cellrange(E8) sheet(SLA DATA)

keep E G H 
drop if E==.

foreach var in G H  {
replace `var'="." if `var'=="-" | `var'=="n.a." 
destring `var', replace
}
rename E sla_code_2001
rename G ws_no
rename H ws_value 
gen finyear = `year'

save ws_9596_0001_year`year', replace 

local year = `year'+1
}
clear
import excel "$dir_in\6524055001_table1_sourceofincomexsla01_asgc01.xls", clear cellrange(E8) sheet(SLA DATA)
keep E H I 
drop if E==.

foreach var in H I  {
replace `var'="." if `var'=="-" | `var'=="n.a." 
destring `var', replace
}
rename E sla_code_2001
rename H ws_no
rename I ws_value 
gen finyear = 2001
save ws_9596_0001_year2001, replace 

* Convert to 2011 SA2s

clear
foreach year in 1996 1997 1998 1999 2000 2001  {
append using ws_9596_0001_year`year'
erase        ws_9596_0001_year`year'.dta
}

reshape wide ws_no ws_value, i(sla_code_2001) j(finyear) 

save ws_9596_0001, replace 

use CG_SLA_2001_SA2_2011, clear 

destring sla_code_2001, replace 

merge m:1 sla_code_2001 using ws_9596_0001
drop if    _merge!=3
drop       _merge

foreach var of varlist ws* {
replace `var' = `var'*ratio 
}
collapse (sum) ws*, by(sa2_maincode_2011)

reshape long ws_no ws_value, i(sa2_maincode_2011) j(finyear) 

rename ws_no    ws_no_C
rename ws_value ws_value_C 

gen    source_file_C = "1996_2001"

save ws_9596_0001, replace 

* Merge the different files together

use ws_9596_0001, clear
merge 1:1 sa2_maincode_2011 finyear using ws_0102_0506, nogen
merge 1:1 sa2_maincode_2011 finyear using ws_0304_0809, nogen

erase ws_9596_0001.dta
erase ws_0102_0506.dta
erase ws_0304_0809.dta

rename sa2_maincode_2011 sa2_2011

sort sa2_2011 finyear

save wages_historical_sa2_2011, replace 

* create a file with ASGS 2016 definitions

use sa2_2011_2016_finyear, clear

keep if finyear>=1996 & finyear<=2009

merge m:1 sa2_2011 finyear using wages_historical_sa2_2011
drop if _merge!=3
drop    _merge

foreach i in ws_no_A ws_value_A ws_no_B ws_value_B ws_no_C ws_value_C {
	replace `i' = `i' * ratio_2011_2016
}
*
collapse (sum) ws_no_A ws_value_A ws_no_B ws_value_B ws_no_C ws_value_C (first) source_file_A source_file_B source_file_C, by(sa2_2016 finyear)

save wages_historical_sa2_2016, replace


/*******************************************************************************
	4 - WPI by state
*******************************************************************************/

import excel "$dir_in\634502b.xls", clear cellrange(A11:I104) sheet(Data1)

rename (B C D E F G H I) (wpi1 wpi2 wpi3 wpi4 wpi5 wpi6 wpi7 wpi8)

gen y = year(A)
gen q = quarter(A)

gen     finyear = y   if q==1 | q==2
replace finyear = y+1 if q==3 | q==4

collapse (mean) wpi1 wpi2 wpi3 wpi4 wpi5 wpi6 wpi7 wpi8, by(finyear)

reshape long wpi, i(finyear) j(llm)

xtset llm finyear 

gen wpi_gr = wpi/L1.wpi*100-100

save wpi_state, replace

clear

* end of do file 