/*******************************************************************************

	5_labour_markets.do
	
	This program constructs classifications of local labour markets based on commuting flows in the Census.
	
	Seperate classifications are developed for the 2011 and 2016 Census (different ASGS for each)

	Last edited: 19 August 2021 

*******************************************************************************/	

clear

set matsize 2300

cd "$dir_out"

/*******************************************************************************
	1 - 2011 Census data on usual residence cross-tabulated against place of work
		*SA2s for ASGS 2011
*******************************************************************************/	

import delimited using "$dir_in\sa2_usualresidence_pow_2011_ASGS2011.csv", delimiters(",") varnames(10) rowrange(12:2225) clear

rename mainstatisticalareastructuremain sa2_ur
destring sa2_ur, replace
format   sa2_ur  %12.0g

foreach v of varlist v2-v2233 {
   local x : variable label `v'
   rename `v' sa2_`x'
}

drop v2234 v2235 total v2237

save 2011_pow, replace

/*******************************************************************************
	2 - 2016 Census data on usual residence cross-tabulated against place of work
		*SA2s for ASGS 2016
*******************************************************************************/	

import delimited using "$dir_in\sa2_usualresidence_pow_2016_ASGS2016.csv", delimiters(",") varnames(10) rowrange(12:2321) clear

rename   sa2pow sa2_ur
destring sa2_ur, replace
format   sa2_ur  %12.0g

foreach v of varlist v2-v2311 {
   local x : variable label `v'
   rename `v' sa2_`x'
}
drop v2312 total v2314

save 2016_pow, replace

/*******************************************************************************
	3 - Hierarchical cluster analysis
	    *2011 and 2016 Censuses
*******************************************************************************/	

putexcel set cluster_formation.xlsx, replace

foreach y in 2011 2016 {

use `y'_pow, clear

/* Drop the following catgeories:
     
	 Migratory - Offshore - Shipping 
	 No Usual Address
	 POW Capital city undefined
	 POW No Fixed Address */

forvalues i = 1(1)9 {
drop  sa2_`i'97979799 sa2_`i'99999499
capture {
drop sa2_`i'90909099 sa2_`i'98989899
}
drop if sa2_ur == `i'97979799 | sa2_ur == `i'99999499
}

save rawmatfull_`y', replace 

* Merge 'zero SA2s' (e.g. airports, national parks) to the closest non-zero SA2 	

quietly ds sa2_ur, not    
   
egen ur_totals = rowtotal(`r(varlist)')

gen long SA2_MAIN = sa2_ur
tostring SA2_MAIN, replace

merge m:1 SA2_MAIN using "$dir_out\SA2_`y'_AUST"
drop if _merge==2
drop _merge

rename (_CY _CX) (lat_sa2 lon_sa2)

gen zero_sa2 = ur_totals < 50
gen double lat_nz_sa2 = lat_sa2  if zero_sa2==0 
gen double lon_nz_sa2 = lon_sa2  if zero_sa2==0 

nearstat lat_sa2 lon_sa2 if zero_sa2==1 , near(lat_nz_sa2 lon_nz_sa2) distvar(dist_sa2)  nid(sa2_ur sa2_ur_closest)

drop SA2_MAIN-AREA_SQKM lat_nz_sa2-lon_nz_sa2 dist_sa2

preserve
keep if zero_sa2==1
keep sa2_ur sa2_ur_closest ur_totals
rename sa2_ur sa2_`y'_zero
rename sa2_ur_closest sa2_`y'_closest
rename ur_totals ur_totals1
save zero_sa2_`y', replace
restore 

* Collapse table down to get rid of the non-zero SA2s

sort zero_sa2 sa2_ur

mkmat sa2_ur sa2_ur_closest if zero_sa2==1, matrix(closest_SA2) rownames(sa2_ur)

* Consolidate columns

sum zero_sa2 if zero_sa2==1
forvalues i = 1(1)`r(N)' {
local sa2_ur         = closest_SA2[`i',1]
local sa2_ur_closest = closest_SA2[`i',2]
replace sa2_`sa2_ur_closest' = sa2_`sa2_ur' + sa2_`sa2_ur_closest'
drop                    sa2_`sa2_ur'
}

* Consolidate rows

replace sa2_ur = sa2_ur_closest if zero_sa2==1

quietly ds sa2_ur ur_totals zero_sa2 sa2_ur_closest, not    

collapse (sum) `r(varlist)', by(sa2_ur)

* Create a raw matrix of the commuter flows

save rawmat_`y', replace

quietly ds sa2_ur, not    

mkmat `r(varlist)', matrix(raw_matrix) rownames(sa2_ur)

* Column vector containing the total resident employed population in each SA2

mata : st_matrix("ur_totals", rowsum(st_matrix("raw_matrix")))

svmat ur_totals

* Construct matrix of the proportional flows, with zeros on the main diagonal ('Similarity matrix')

local obs = _N

matrix prop_flows = J(`obs',`obs',.)

forvalues i = 1(1)`obs' {
forvalues j = 1(1)`obs' {
matrix prop_flows[`j',`i'] = min((raw_matrix[`j',`i'] + raw_matrix[`i',`j']) / min(ur_totals[`j',1], ur_totals[`i',1]), 0.999)
}
}

* Dissimilarity matrix

matrix ones = J(`obs',`obs',1)

matrix dis_mat = ones - prop_flows

forvalues i = 1(1)`obs' {
matrix dis_mat[`i',`i'] = 0
}
*

local rnames : rownames raw_matrix
local cnames : rownames raw_matrix
matrix rownames dis_mat = `rnames'
matrix colnames dis_mat = `cnames'

quietly ds sa2_ur sa2_ur ur_totals1, not    

drop `r(varlist)'

* Hierarchical cluster analysis (average linkage)

clustermat averagelinkage dis_mat, labelvar(varnames) name(cluster_analysis) add

* Cluster analysis for all cluster heights from zero to 1 in increments of 0.001

local p = 0
forvalues j = 0.001(0.001)1.001 {
local p = `p' + 1
cluster generate c`p' = cut(`j') 
}

matrix height = J(1000,2,.)

matrix colnames height = height_cutoff clusters

forvalues j = 1(1)1000 {
matrix height[`j',1] = `j'/1000
sum c`j'
matrix height[`j',2] = `r(max)'
}

* Export number of cluster vs cluster height to excel

putexcel set cluster_formation.xlsx, sheet("height_clusterformation_`y'") modify

putexcel A1=matrix(height), colnames
putexcel A1=matrix(height), colnames

rename sa2_ur sa2_`y'

save local_lm_working_`y', replace

* Examine the characteristics of clusters at the preferred heights 

cluster generate c_pr980 = cut(0.980) 

keep sa2_`y' ur_totals1 c_pr980 

save cluster_preferred_`y', replace

* Assign the zero SA2s to the appropriate cluster

use zero_sa2_`y', clear

rename sa2_`y'_closest sa2_`y'

merge m:1 sa2_`y' using cluster_preferred_`y'

keep if _merge==3

drop _merge sa2_`y'

rename sa2_`y'_zero sa2_`y'

append using cluster_preferred_`y'

order sa2_`y'
sort  sa2_`y'

save cluster_preferred_`y', replace

erase local_lm_working_`y'.dta
erase zero_sa2_`y'.dta
}

/*******************************************************************************
	4 - Alternative LLM classifications (FER classifications)
*******************************************************************************/	

/* CofFEE (http://www.fullemployment.net/fer.php)
    excludes:
	- Lord Howe Island (108031161)
	- Christmas Island (901011001)
	- Cocos (Keeling) Islands (901021002)
	- Jervis Bay (901031003) */

import excel "$dir_in\CFER2011_Concordances.xlsx", sheet(CFER2011) firstrow clear

keep SA2_Code_2011 CFER_Code_2011

rename (SA2_Code_2011 CFER_Code_2011) (sa2_2011 c_ferco)

save FER_CofFEE, replace 

/* PC (https://www.pc.gov.au/inquiries/completed/transitioning-regions/report)

    includes:
	- Norfolk Island (901031003)
	
	Norfolk Island is not included in the 2011 ASGS but is included in the 2016 ASGS*/

import delimited "$dir_in\SA2_to_FER.csv", varnames(1) clear

keep sa2_main11 ferid

rename (sa2_main11 ferid) (sa2_2011 c_ferpc)

save FER_PC, replace 

/*******************************************************************************
	5 - Merge LLM classifications (ASGS 2011)
*******************************************************************************/	

use cluster_preferred_2011, clear

merge 1:1 sa2_2011 using FER_CofFEE, nogen
merge 1:1 sa2_2011 using FER_PC    
drop if _merge==2
drop    _merge

erase FER_CofFEE.dta
erase FER_PC.dta

save cluster_preferred_2011_all, replace

/*******************************************************************************
	6 - Convert from ASGS 2011 to ASGS 2016  
*******************************************************************************/	

foreach clus in c_pr980 c_ferco c_ferpc {   
   
use sa2_2011_2016, clear

order sa2_2011 sa2_2016 ratio_2011_2016

* Non-matches are zero sa2s, Migratory - Offshore - Shipping (OT) and No usual address (OT)

merge m:1 sa2_2011 using cluster_preferred_2011_all.dta
keep if _merge==3
drop    _merge

sort sa2_2011 sa2_2016

bysort sa2_2016: egen `clus'_max = max(`clus')
bysort sa2_2016: egen `clus'_min = min(`clus')

gen conflict =(`clus'_max != `clus'_min)

* In cases where 2016 SA2s are mapped to more than 1 cluster, assign based on population weight

gen ur_weighted = ur_totals1*ratio_2011_2016

bysort sa2_2016 `clus':  egen ur_weighted_`clus'     = sum(ur_weighted)

bysort sa2_2016       :  egen ur_weighted_`clus'_max = max(ur_weighted_`clus')

keep if ur_weighted_`clus'==ur_weighted_`clus'_max

collapse (mean) `clus'_11c=`clus' (sum) pop2011=ur_totals1, by(sa2_2016)

drop if sa2_2016==. | `clus'_11c==.

save working_`clus', replace

}
*erase cluster_preferred_2011_all.dta

use working_c_pr980, clear
erase working_c_pr980.dta

foreach clus in c_ferco c_ferpc {   
	merge 1:1 sa2_2016 using working_`clus', nogen
	erase working_`clus'.dta
}

save cluster_preferred_2011_asgs2016, replace 


/*******************************************************************************
	6 - Combined file with both 2011 and 2016 Census (2016 ASGS)
	    *Norfolk Island (901041004) is missing from the 2011 Census, so set as a separate labour markets
*******************************************************************************/	

merge 1:1 sa2_2016 using cluster_preferred_2016, nogen

foreach i in c_pr980_11c c_ferco_11c c_ferpc_11c {

sum `i'

replace `i'=r(max)+1 if sa2_2016==901041004

}

rename (c_pr980 ur_totals1) (c_pr980_16c pop2016)

* include SA2, SA3, SA4, GCCSA and state as alternative ways of defining labour markets

merge 1:1 sa2_2016 using sa2_descriptors_2016
keep if _merge==3

encode gccsa_code_2016, gen(c_gccsa_16c)

rename state_code_2016 c_state_16c 

tostring sa2_2016, gen(e)
gen c_sa3_16c = substr(e,1,5)
gen c_sa4_16c = substr(e,1,3)

destring c_sa3_16c c_sa4_16c, replace

drop    _merge gccsa_code_2016 e

order sa2_2016 c_* pop*

save cluster_preferred_combined_asgs2016, replace

clear


* end of do file
