/*******************************************************************************

	9_descriptives.do
	
	This file contains code to produce an excel file containing the descriptive
	statistics in Appendices B and C of Bishop J and I Chan (2019), Is
	Declining Union Membership Contributing to Low Wages Growth?, RBA Research
	Discussion Paper No 2019-02.
	
	For the baseline sample, average AAWI, average effective duration, the total
	number of agreements and the average number of employees covered per agreement
	are produced for agreements negotiated with union involvement and those without.

	For the difference-in-differences sample, a number of descriptive measures are
	produced for greenfields and non-greenfields agreements across three legislative
	regimes.
	
	The labutil package needs to be installed for this code to run.
		
*******************************************************************************/

clear all
set more off   

local dir  "<path for 'data' folder here>"

cd "`dir'"


/********************
 A. Baseline sample
********************/

use wad_extract_final_RDP, clear

keep if (in_panel_pub == 1 | in_panel_pri == 1) // Use only in-sample agreements

gen agrnum = 1 // Generate numerical variable for counting

* (A1) By industry
preserve
	collapse ///
		(mean) aawi_a_effect dur_effect ///
		(count) agrnum ///
		(sum) emplnum=employees_combined, ///
		by(anzsic_1dig union public)
	gen double emplavg = emplnum / agrnum
	drop emplnum
	ds public union anzsic, not
	reshape wide `r(varlist)', i(public anzsic_1dig) j(union)
	sort public anzsic
	save desc_ind, replace
restore

* (A2) By state
preserve
	collapse ///
		(mean) aawi_a_effect dur_effect ///
		(count) agrnum ///
		(sum) emplnum=employees_combined, ///
		by(state_code union public)
	gen double emplavg = emplnum / agrnum
	drop emplnum
	ds public union state_code, not
	reshape wide `r(varlist)', i(public state_code) j(union)
	sort public state_code
	save desc_state, replace
restore


* (A3) All agreements
collapse ///
	(mean) aawi_a_effect dur_effect ///
	(count) agrnum ///
	(sum) emplnum=employees_combined, ///
	by(union public)
gen double emplavg = emplnum / agrnum
drop emplnum
ds union public, not
reshape wide `r(varlist)', i(public) j(union)
sort public
save desc_all, replace


* Labelling
foreach f in ind state all {
	use desc_`f', clear
	foreach v of varlist _all {
		label variable `v' ""
	}
	rename (*0 *1) (*_nu *_u)
	cap label variable public "Public sector"
	cap label variable anzsic_1dig "ANZSIC93 1-digit (modal industry by employees covered)"
	cap label variable state_code "State/territory in which agreement applies"
	labvarch aawi_a_effect*, prefix("Average AAWI")
	labvarch dur_effect*, prefix("Average effective duration")
	labvarch agrnum*, prefix("Total number of agreements")
	labvarch emplavg*, prefix("Average employees covered per agreement")
	labvarch *_u, postfix(", union agreements")
	labvarch *_nu, postfix(", non-union agreements")
	export excel using descriptives.xlsx, sheet("desc_fe_`f'") cell(A1) sheetreplace firstrow(varl)
	erase desc_`f'.dta
}


/****************************************
 B. Difference-in-differences sample
****************************************/

use wad_extract_final_RDP, clear

replace cert_date = td(27March2006) if cert_date >= td(27March2006) & Law==3
replace cert_date = td(30Jun2009)   if cert_date >= td(30Jun2009)   & Law==2

local l_wc = (td(30Jun2009) - td(27March2006)) / 365

keep if (cert_date>=(td(27March2006)-365*`l_wc')  & cert_date<=(td(27March2006)+365*`l_wc')) | ///
        (cert_date>=(td(30Jun2009)  -365*`l_wc')  & cert_date<=(td(30Jun2009)  +365*`l_wc'))

keep if short_duration!=1 & public==0 & aawi_a_effect!=. & ind_state!=. & anzsic06_2dig!=. & gf!=.

gen agrnum = 1 // Generate numerical variable for counting

* (B1) Share by industry
preserve
	collapse ///
		(count) agrnum, ///
		by(gf Law anzsic_1dig)
	gsort -gf -Law anzsic_1dig
	egen double agr_t = total(agrnum), by(gf Law)
	gen double agr_sh = agrnum/agr_t * 100
	drop agrnum agr_t
	reshape wide agr_sh, i(Law anzsic_1dig) j(gf)
	rename (*0 *1) (*_ng *_g)
	reshape wide agr_sh*, i(anzsic_1dig) j(Law)
	rename (*1 *2 *3) (*_fwa *_wc *_wra)
	sort anzsic
	
	foreach v of varlist _all {
		label variable `v' "Share of"
	}
	label variable anzsic_1dig "ANZSIC93 1-digit (modal industry by employees covered)"
	labvarch *_ng_*, postfix(" non-greenfields agr")
	labvarch *_g_*, postfix(" greenfields agr")
	labvarch *_fwa, postfix(" cert under FW Act")
	labvarch *_wc, postfix(" cert under Work Choices")
	labvarch *_wra, postfix(" cert under WR Act")
	export excel using descriptives.xlsx, sheet("desc_dd_ind") cell(A1) sheetreplace firstrow(varl)
restore

* (B2) All agreements
collapse ///
	(mean) aawi_a_effect dur_effect union ///
	(count) agrnum ///
	(sum) emplnum=employees_combined, ///
	by(gf Law)
gen double emplavg = emplnum / agrnum
drop emplnum
label variable aawi_a_effect "Average AAWI"
label variable dur_effect "Average effective duration"
label variable agrnum "Total number of agreements"
label variable emplavg "Average employees covered per agreement"
label variable union "Share of agreements negotiated with union involvement"
export excel using descriptives.xlsx, sheet("desc_dd_all") cell(A1) sheetreplace firstrow(varl)

* end of do file
