/*******************************************************************************

	6_table_b1.do
	
	This program constructs the data in Appendex B, Table B1.
	
	Last edited: 19 August 2021 

*******************************************************************************/	

clear all

set matsize 2300

cd "$dir_out"

/*******************************************************************************
	1 - Define mata functions for later use
*******************************************************************************/	

* 1A) 	Function that computes unweighted mean of a vector (that could have missing elements)
* 		Note that the argument of the function (a scalar string) doesn't do anything

mata
void mata_function(string scalar a) {
out_c_mata   = st_matrix("out_c")
inf_c_mata   = st_matrix("inf_c")
mobi_c_mata  = st_matrix("mobi_c")
mobo_c_mata  = st_matrix("mobo_c")

out_c_mata_mn  = mean(out_c_mata) 
inf_c_mata_mn  = mean(inf_c_mata) 
mobi_c_mata_mn = mean(mobi_c_mata) 
mobo_c_mata_mn = mean(mobo_c_mata) 

st_matrix("out_c_av"  , out_c_mata_mn) 
st_matrix("inf_c_av"  , inf_c_mata_mn) 
st_matrix("mobi_c_av"  , mobi_c_mata_mn) 
st_matrix("mobo_c_av"  , mobo_c_mata_mn) 
}
end

* 1B) 	Function creating column vectors containing 0/1 variables to indicate whether an SA2 lies in a given cluster .
*		The argument of the function ("a") refers to the number identifier assigned to the cluster. The second 
*   	argument is the column of the cluster matrix (i.e. the set of cluster allocations for that specific height)

mata
function C_vector(a, b) {
cluster_mata     = st_matrix("cluster_matrix")
cluster_vec_mata = cluster_mata[.,b]
C = cluster_vec_mata :== a
st_matrix("C" , C) 
}
end

/*******************************************************************************
	2 - 2011 Census data on residential mobility 
		*usual residence 1 year ago cross-tabulated against usual residence today
*******************************************************************************/	

import delimited using "$dir_in\sa2_usualresidence1yrago_usualresidence_2011_ASGS2011.csv", delimiters(",") varnames(10) rowrange(12:2241) clear

rename   mainstatisticalareastructuremain sa2yr
destring sa2yr, replace
format   sa2yr  %12.0g

foreach v of varlist v2-v2217 {
   local x : variable label `v'
   rename `v' sa2_`x'
}

drop sa2_Total sa2_

forvalues i = 1(1)9 {
drop  sa2_`i'97979799 sa2_`i'99999499 
drop if sa2yr == `i'90909099 | sa2yr == `i'97979799 | sa2yr == `i'98989899 | sa2yr == `i'99999499
}
drop if sa2yr == 1000009299

quietly ds sa2yr, not    

* construct mobility matrix

mkmat `r(varlist)', matrix(mob_matrix) rownames(sa2yr)

clear

/*******************************************************************************
	3 - 2011 Census data on commuting flows
		*place of work cross-tabulated against usual residence 
*******************************************************************************/	

use rawmatfull_2011, clear

quietly ds sa2_ur, not    

* construct commuting matrix

mkmat `r(varlist)', matrix(raw_matrix) rownames(sa2_ur)

/*******************************************************************************
	4 - Merge files and compute Table B1
*******************************************************************************/	

rename sa2_ur sa2_2011

merge 1:1 sa2_2011 using cluster_preferred_2011_all, nogen
merge 1:1 sa2_2011 using sa2_descriptors_2011, nogen keep(match)

tostring sa2_2011, gen(sa2)
gen sa3 = substr(sa2,1,5)
gen sa4 = substr(sa2,1,3)

qui sum sa2_2011
matrix col_ones = J(r(N),1,1)
matrix row_ones = J(1,r(N),1)

rename (gccsa_code_2011 state_code_2011 c_ferco c_ferpc) (gccsa state co pc)

foreach i in sa2 sa3 sa4 gccsa state co pc {
egen c_`i' = group(`i')
}

mkmat c_sa2 c_sa3 c_sa4 c_gccsa c_state c_co c_pc c_pr980, matrix(cluster_matrix) 

mata: st_matrix("no_clusters", colmax(st_matrix("cluster_matrix")))
matrix no_clusters = no_clusters'

matrix grand_flows = J(8,6,.)

matrix colnames grand_flows = number mean_SA2s mean_pop mean_land mean_commute mean_mobility

forvalues r = 1(1)8 {

local z = no_clusters[`r',1]

matrix mat_flows = J(`z',4,.)

forvalues v = 1(1)`z' {

mata   C_vector(`v', `r')
matrix C_prime = C'

matrix livework_cluster = C_prime * raw_matrix * C
matrix live_cluster     = C_prime * raw_matrix * col_ones
matrix work_cluster     = row_ones * raw_matrix * C
matrix worknotlive_cluster = work_cluster - livework_cluster

matrix livenow_liveoneyr   = C_prime  * mob_matrix * C
matrix liveoneyr           = C_prime * mob_matrix * col_ones
matrix livenow             = row_ones * mob_matrix * C
matrix livenow_notoneyr    = livenow - livenow_liveoneyr
matrix notlivenow_liveoneyr= liveoneyr-livenow_liveoneyr

matrix mat_flows[`v',1] =livework_cluster[1,1]    / live_cluster[1,1]  
matrix mat_flows[`v',2] =worknotlive_cluster[1,1] / work_cluster[1,1]  
matrix mat_flows[`v',3] =livenow_notoneyr[1,1] / livenow[1,1] 
matrix mat_flows[`v',4] =notlivenow_liveoneyr[1,1] / liveoneyr[1,1] 
}
matrix out_c = mat_flows[1...,1]
matrix unit  = J(rowsof(out_c),1,1)
matrix out_c = (unit-out_c)*100
matrix inf_c  = mat_flows[1...,2]*100
matrix mobi_c  = mat_flows[1...,3]*100
matrix mobo_c  = mat_flows[1...,4]*100

mata mata_function("a")

matrix grand_flows[`r',1]=no_clusters[`r',1]
matrix grand_flows[`r',2]=no_clusters[1,1]/no_clusters[`r',1]
matrix grand_flows[`r',3]=21507.717/no_clusters[`r',1]
matrix grand_flows[`r',4]=7687.808613/no_clusters[`r',1]
matrix grand_flows[`r',5]=(out_c_av[1,1] + inf_c_av[1,1])/2
matrix grand_flows[`r',6]=(mobi_c_av[1,1] + mobo_c_av[1,1])/2

disp "`r'"
}

* export results to tableB1.xlsx

putexcel set tableB1.xlsx, sheet("in_out_other", replace) modify
putexcel B1=matrix(grand_flows), colnames
putexcel A2="SA2_2011"
putexcel A3="SA3_2011"
putexcel A4="SA4_2011"
putexcel A5="GCCSA"
putexcel A6="State"
putexcel A7="FER_CofFEE"
putexcel A8="FER_PC"
putexcel A9="LLM"

* end of do file