/*******************************************************************************

	1_import_data.do
	
	This program extracts and cleans the raw LLFS CURF data.

	Creators:    James Bishop and Iris Day
	Last edited: 10 November 2020 

*******************************************************************************/	

clear all

cd "S:\2020-z008 Capacity_SW\JK_analysis\RDP_documentation\output"

use "V:\LLFD_G_monthly\llfs82_v2009.dta", clear


/*******************************************************************************
	1 - compress file and drop unused variables
*******************************************************************************/	

compress

drop frstmth sampfrme rotgrp resptype cobrc decarr famnum ///
     numfamh nper15h nper15f nkid14f nkid04f agyng14f nsq01fld hiq93fld ///
     ind83grp ind78grp occ13occ occseugr occfeugr occ76occ ind93grp

/*******************************************************************************
	2 - clean variables
*******************************************************************************/		 
	 
* Declare time variables
tostring absmid, gen(absmid_str)
drop     absmid
gen      absmid = ym(year(date(absmid_str, "YM")),month(date(absmid_str, "YM")))
format   absmid %tm
drop     absmid_str
order    absmid

* Replace string IDs with memory-preserving IDs
sort absmid abshid absrid 

foreach i in absrid abshid {
egen `i'ID = group(`i')
drop `i'
rename `i'ID `i'
}
order absmid abshid absrid

compress

/*******************************************************************************
	3 - save 'skinny' file
*******************************************************************************/	

save skinny_full.dta, replace

*keep 2018, 2019 & 2020 data

keep if survyear>=2018

compress

sort  absrid absmid

save skinny.dta, replace

clear all

* end of do file