** The two following data files cannot be released publicly, so are not included
** in the public part of the working directory.

* This data has cash use shares for CPS respondents
* Save a working data file, we're going to be using for merging as .dta
import delimited using "INTERNAL_ONLY\Data\CPS_Cash_Use_Shares.csv", clear
rename respondent_id Respondent_ID
save  "INTERNAL_ONLY\Data\Working Data\CPS_Cash_Use_Shares",replace

* Main CPS Data
import excel using "INTERNAL_ONLY\Data\CPS_2022.xlsx", firstrow clear

*** Drop repeated observations used for other parts of the survey.
keep if idtag_diarycrosswave==1

*** Replace NA as missing
foreach var of varlist * {
cap replace `var' = "" if `var'=="NA"
}

*** Create new variables for accounts A and B
** Based on account attributes table mapping for CPS 2022
** See INTERNAL_ONLY/Data/Tables_Online.docx and INTERNAL_ONLY/Data/Tables_Offline.docx

* Create dummy for choosing account A; 0 no, 1 yes
gen choosea = .
replace choosea = 1 if rbabankacc_pref =="Account A"
replace choosea = 0 if rbabankacc_pref =="Account B"

label define choosea 0 "No" 1 "Yes"
label values choosea choosea

* Create dummies for fee
gen fee_a = 0
replace fee_a = 1 if rbabankacc_table == "1_online"|rbabankacc_table =="5_online"|rbabankacc_table =="7_online"|rbabankacc_table =="9_online"|rbabankacc_table =="10_online"|rbabankacc_table =="13_online"|rbabankacc_table =="14_online"|rbabankacc_table =="15_online"|rbabankacc_table =="17_online"|rbabankacc_table =="18_online"|rbabankacc_table =="20_online"|rbabankacc_table == "1_paper"|rbabankacc_table == "5_paper"|rbabankacc_table =="7_paper"|rbabankacc_table =="8_paper"|rbabankacc_table =="9_paper"|rbabankacc_table =="10_paper"

replace fee_a=. if rbabankacc_table =="" 

gen fee_b = abs(fee_a-1)

label define fee 0 "$20" 1 "$25"
label values fee_a fee
label values fee_b fee

* Create dummies for RBA claim
gen rbaclaim_a = 0 
replace rbaclaim_a = 1 if rbabankacc_table== "1_online" |rbabankacc_table== "3_online" | rbabankacc_table== "4_online" |rbabankacc_table== "6_online" | rbabankacc_table== "8_online" | rbabankacc_table== "9_online" | rbabankacc_table== "11_online" | rbabankacc_table== "13_online" |rbabankacc_table== "14_online" |rbabankacc_table== "20_online"|rbabankacc_table== "1_paper" |rbabankacc_table== "3_paper"|rbabankacc_table== "4_paper"|rbabankacc_table== "6_paper"|rbabankacc_table== "7_paper"|rbabankacc_table== "8_paper"|rbabankacc_table== "9_paper"

replace rbaclaim_a=. if rbabankacc_table =="" 

gen rbaclaim_b = abs(rbaclaim_a-1)

label define rbaclaim 0 "No" 1 "Yes"
label values rbaclaim_a rbaclaim
label values rbaclaim_b rbaclaim

* Create dummies for commercial bankclaim

gen commercialclaim_a = 1 if rbaclaim_a == 0
replace commercialclaim_a = 0 if rbaclaim_a == 1
gen commercialclaim_b = 1 if rbaclaim_b == 0
replace commercialclaim_b = 0 if rbaclaim_b == 1
label define commercialclaim 0 "No" 1 "Yes"
label values commercialclaim_a commercialclaim
label values commercialclaim_b commercialclaim

* Create categorical variable for privacy attributes
gen privacy_a =.
replace privacy_a = 1 if rbabankacc_table== "1_online"|rbabankacc_table== "7_online"|rbabankacc_table== "16_online"|rbabankacc_table== "3_paper"|rbabankacc_table== "6_paper"|rbabankacc_table== "10_paper"
replace privacy_a = 2 if rbabankacc_table== "2_online"|rbabankacc_table== "5_online"|rbabankacc_table== "6_online"|rbabankacc_table== "9_online"|rbabankacc_table== "11_online"|rbabankacc_table== "12_online"|rbabankacc_table== "14_online"|rbabankacc_table== "15_online"|rbabankacc_table== "1_paper"|rbabankacc_table== "5_paper"
replace privacy_a = 3 if rbabankacc_table== "3_online"|rbabankacc_table== "4_online"|rbabankacc_table== "17_online"|rbabankacc_table== "18_online"|rbabankacc_table== "20_online"|rbabankacc_table== "2_paper"|rbabankacc_table== "4_paper"|rbabankacc_table== "7_paper"|rbabankacc_table== "9_paper"
replace privacy_a = 4 if rbabankacc_table== "8_online"|rbabankacc_table== "10_online"|rbabankacc_table== "13_online"|rbabankacc_table== "19_online"|rbabankacc_table== "8_paper"

gen privacy_b =.
replace privacy_b = 1 if rbabankacc_table== "2_online"|rbabankacc_table== "3_online"|rbabankacc_table== "13_online"|rbabankacc_table== "15_online"|rbabankacc_table== "18_online"|rbabankacc_table== "19_online"|rbabankacc_table== "20_online"|rbabankacc_table== "4_paper"
replace privacy_b = 2 if rbabankacc_table== "4_online"|rbabankacc_table== "7_online"|rbabankacc_table== "8_paper"|rbabankacc_table== "10_paper"
replace privacy_b = 3 if rbabankacc_table== "1_online"|rbabankacc_table== "8_online"|rbabankacc_table== "9_online"|rbabankacc_table== "10_online"|rbabankacc_table== "12_online"|rbabankacc_table== "1_paper"|rbabankacc_table== "6_paper"
replace privacy_b = 4 if rbabankacc_table== "5_online"|rbabankacc_table== "6_online"|rbabankacc_table== "11_online"|rbabankacc_table== "14_online"|rbabankacc_table=="16_online"|rbabankacc_table=="17_online"|rbabankacc_table=="2_paper"|rbabankacc_table=="3_paper"|rbabankacc_table=="5_paper"|rbabankacc_table=="7_paper"|rbabankacc_table=="9_paper"

label define cbdcprivacy 1 "No one" 2 "Provider only" 3 "AUSTRAC only" 4 "Provider and AUSTRAC"
label values privacy_a cbdcprivacy
label values privacy_b cbdcprivacy

* Create extension of privacy variables
gen privacyext_a=.
replace privacyext_a = 1 if privacy_a==1
replace privacyext_a = 2 if privacy_a==2&rbaclaim_a==1
replace	privacyext_a = 3 if privacy_a==2&rbaclaim_a==0
replace privacyext_a = 4 if privacy_a==3
replace privacyext_a = 5 if privacy_a==4&rbaclaim_a==1
replace privacyext_a = 6 if privacy_a==4&rbaclaim_a==0

gen privacyext_b=.
replace privacyext_b = 1 if privacy_b==1
replace privacyext_b = 2 if privacy_b==2&rbaclaim_b==1
replace	privacyext_b = 3 if privacy_b==2&rbaclaim_b==0
replace privacyext_b = 4 if privacy_b==3
replace privacyext_b = 5 if privacy_b==4&rbaclaim_b==1
replace privacyext_b = 6 if privacy_b==4&rbaclaim_b==0

label define privacyext 1 "No one" 2 "RBA only" 3 "Commercial bank only" 4 "AUSTRAC only" 5 "RBA and AUSTRAC" 6 "Commercial bank and AUSTRAC"
label values privacyext_a privacyext
label values privacyext_b privacyext

* Create dummies for RBA visibility only
gen rbaonly_a=0
replace rbaonly_a=1 if privacyext_a==2
replace rbaonly_a=. if privacyext_a==.

gen rbaonly_b=0
replace rbaonly_b=1 if privacyext_b==2
replace rbaonly_b=. if privacyext_b==.

label define rbaonly 0 "No" 1 "Yes"
label values rbaonly_a rbaonly
label values rbaonly_b rbaonly

* Create dummies for commerical bank visibility only
gen commercialonly_a=0
replace commercialonly_a=1 if privacyext_a==3
replace commercialonly_a=. if privacyext_a==.

gen commercialonly_b=0
replace commercialonly_b=1 if privacyext_b==3
replace commercialonly_b=. if privacyext_b==.

label define commercialonly 0 "No" 1 "Yes"
label values commercialonly_a commercialonly
label values commercialonly_b commercialonly

* Create dummies for AUSTRAC visibility only
gen austraconly_a=0
replace austraconly_a=1 if privacyext_a==4
replace austraconly_a=. if privacyext_a==.

gen austraconly_b=0
replace austraconly_b=1 if privacyext_b==4
replace austraconly_b=. if privacyext_b==.

label define austraconly 0 "No" 1 "Yes"
label values austraconly_a austraconly
label values austraconly_b austraconly

* Create dummies for RBA and AUSTRAC visibility
gen rbaaustrac_a=0
replace rbaaustrac_a=1 if privacyext_a==5
replace rbaaustrac_a=. if privacyext_a==.

gen rbaaustrac_b=0
replace rbaaustrac_b=1 if privacyext_b==5
replace rbaaustrac_b=. if privacyext_b==.

label define rbaaustrac 0 "No" 1 "Yes"
label values rbaaustrac_a rbaaustrac
label values rbaaustrac_b rbaaustrac

* Create dummies for commercial and AUSTRAC visibility
gen commercialaustrac_a=0
replace commercialaustrac_a=1 if privacyext_a==6
replace commercialaustrac_a=. if privacyext_a==.

gen commercialaustrac_b=0
replace commercialaustrac_b=1 if privacyext_b==6
replace commercialaustrac_b=. if privacyext_b==.

label define commercialaustrac 0 "No" 1 "Yes"
label values commercialaustrac_a commercialaustrac
label values commercialaustrac_b commercialaustrac

*** Create dummies for privacy attributes offered to each respondent

* No one
gen noone=0
replace noone=1 if privacy_a==1| privacy_b==1
replace noone=. if privacy_a==.

label values noone provideronly

* Provider only
gen provideronly=0
replace provideronly=1 if privacy_a==2 | privacy_b==2
replace provideronly=. if privacy_a==.

label values provideronly provideronly

* AUSTRAC only
gen austraconly=0
replace austraconly=1 if privacy_a==3 | privacy_b==3
replace austraconly=. if privacy_a==.

label values austraconly austraconly

* Provider and AUSTRAC
gen provideraustrac=0
replace provideraustrac=1 if privacy_a==4 | privacy_b==4
replace provideraustrac=. if privacy_a==.

label values provideraustrac provideraustrac

* RBA only
gen rbaonly=0
replace rbaonly=1 if privacyext_a==2 | privacyext_b==2
replace rbaonly=. if privacyext_a==.

label values rbaonly rbaonly

* Commercial bank only
gen commercialonly=0
replace commercialonly=1 if privacyext_a==3 | privacyext_b==3
replace commercialonly=. if privacyext_a==.

label values commercialonly commercialonly

* RBA and AUSTRAC
gen rbaaustrac=0
replace rbaaustrac=1 if privacyext_a==5 | privacyext_b==5
replace rbaaustrac=. if privacyext_a==.

label values rbaaustrac rbaaustrac

* Commercial bank and AUSTRAC
gen commercialaustrac=0
replace commercialaustrac=1 if privacyext_a==6 | privacyext_b==6
replace commercialaustrac=. if privacyext_a==.

label values commercialaustrac commercialaustrac

*** Create dummies for attributes of chosen account

* Fee
gen choose20=0
replace choose20=1 if (fee_a==0&choosea==1) | (fee_b==0&choosea==0)
replace choose20=. if choosea==.

label values choose20 choosea 

* RBA claim
gen chooserbaclaim=0
replace chooserbaclaim=1 if (rbaclaim_a==1&choosea==1) | (rbaclaim_b==1&choosea==0)
replace chooserbaclaim=. if choosea==.

label values chooserbaclaim choosea

* No one
gen choosenoone=0
replace choosenoone=1 if (privacy_a==1&choosea==1) | (privacy_b==1&choosea==0)
replace choosenoone=. if noone==.|noone==0

label values choosenoone choosea

* Provider only
gen chooseprovideronly=0
replace chooseprovideronly=1 if (privacy_a==2&choosea==1) | (privacy_b==2&choosea==0)
replace chooseprovideronly=. if provideronly==.|provideronly==0

label values chooseprovideronly choosea

* AUSTRAC only
gen chooseaustraconly=0
replace chooseaustraconly=1 if (privacy_a==3&choosea==1) | (privacy_b==3&choosea==0)
replace chooseaustraconly=. if austraconly==.|austraconly==0

label values chooseaustraconly choosea

* Provider and AUSTRAC
gen chooseprovideraustrac=0
replace chooseprovideraustrac=1 if (privacy_a==4&choosea==1) | (privacy_b==4&choosea==0)
replace chooseprovideraustrac=. if provideraustrac==.|provideraustrac==0

label values chooseprovideraustrac choosea

* RBA only
gen chooserbaonly=0
replace chooserbaonly=1 if (privacyext_a==2&choosea==1) | (privacyext_b==2&choosea==0)
replace chooserbaonly=. if rbaonly==.|rbaonly==0

label values chooserbaonly choosea

* Commercial bank only
gen choosecommercialonly=0
replace choosecommercialonly=1 if (privacyext_a==3&choosea==1) | (privacyext_b==3&choosea==0)
replace choosecommercialonly=. if commercialonly==.|commercialonly==0

label values choosecommercialonly choosea

* RBA and AUSTRAC
gen chooserbaaustrac=0
replace chooserbaaustrac=1 if (privacyext_a==5&choosea==1) | (privacyext_b==5&choosea==0)
replace chooserbaaustrac=. if rbaaustrac==.|rbaaustrac==0

label values chooserbaaustrac choosea

* Commercial bank and AUSTRAC
gen choosecommercialaustrac=0
replace choosecommercialaustrac=1 if (privacyext_a==6&choosea==1) | (privacyext_b==6&choosea==0)
replace choosecommercialaustrac=. if commercialaustrac==.|commercialaustrac==0

label values choosecommercialaustrac choosea

*** Create attribute difference dummies A-B
gen dfee = fee_a-fee_b
gen drbaclaim = rbaclaim_a-rbaclaim_b
gen dcommercialclaim = commercialclaim_a - commercialclaim_b
gen drbaonly = rbaonly_a-rbaonly_b
gen dcommercialonly = commercialonly_a-commercialonly_b
gen daustraconly = austraconly_a-austraconly_b
gen drbaaustrac = rbaaustrac_a-rbaaustrac_b
gen dcommercialaustrac = commercialaustrac_a-commercialaustrac_b


*** Coding the covariates
** Age - use RawAge where possible, which is granular, but
** for some missing values use the agebracket variable midpoint.

* Use mean age for top-coded group w/ data available to pick a single
* number for the top-coded group where data unavailable.
destring RawAge,generate(age_num)

summarize age_num if age_num>=90
scalar agemean_90_plus=r(mean)
display agemean_90_plus

gen agebracket_mid=.
replace agebracket_mid = 21 if agebracket== "18-24" 
replace agebracket_mid = 29.5 if agebracket== "25-34" 
replace agebracket_mid = 39.5 if agebracket== "35-44" 
replace agebracket_mid = 49.5 if agebracket== "45-54" 
replace agebracket_mid = 59.5 if agebracket== "55-64" 
replace agebracket_mid = 67 if agebracket== "65-69" 
replace agebracket_mid = 74.5 if agebracket== "70-79" 
replace agebracket_mid = 84.5 if agebracket== "80-89" 
replace agebracket_mid = agemean_90_plus if agebracket== "90 or older" 

gen age_final = age_num
replace age_final = agebracket_mid if age_final==.

** Calculate age quartiles after keeping only unique ID observations
xtile age_quartile = age_final, n(4)

** Income
* note hhinc is missing for some. We have
* full observations for hhinc_crosswave, which was used to
* create quartiles in hhincq
* hhincq was the result of respondents being asked to tell
* what quartile they fall into (pre-defined ranges)
* info: hhinc was the result of households being asked for broad ranges of
* household income. There were some no responses, these were filled from
* hhincq according to survey provider. There was no specific income variable
* in the CPS (that didnt rely on ranges)
* Quartiles are: <50k, 50-99.999k, 100k-159.999k, 160k+

* Generate quartile based variable
gen hhincq_final = .
replace hhincq_final = 1 if hhincq =="1st quartile"
replace hhincq_final = 2 if hhincq =="2nd quartile"
replace hhincq_final = 3 if hhincq =="3rd quartile"
replace hhincq_final = 4 if hhincq =="4th quartile"

label define hhincq_labels 1 "<$50k" 2 "$50k-$99k" 3 "$100k-159k" 4 "$160k+"
label values hhincq_final hhincq_labels

* Generate numeric income variable based on mid-points
gen hhinc_num = 3900 if hhinc=="$1-$7,799 per year"
replace hhinc_num = 104999.5 if hhinc== "$100,000-$109,999 per year"
replace hhinc_num = 114999.5 if hhinc== "$110,000-$119,999 per year"
replace hhinc_num = 124999.5 if hhinc== "$120,000-$129,999 per year"
replace hhinc_num = 144999.5 if hhinc== "$130,000-$159,999 per year"
replace hhinc_num = 179999.5 if hhinc== "$160,000-$199,999 per year"
replace hhinc_num = 24999.5 if hhinc== "$20,000-$29,999 per year"
replace hhinc_num = 224999.5 if hhinc== "$200,000-$249,999 per year"
replace hhinc_num = 34999.5 if hhinc== "$30,000-$39,999 per year"
replace hhinc_num = 44999.5 if hhinc== "$40,000-$49,999 per year"
replace hhinc_num = 54999.5 if hhinc== "$50,000-$59,999 per year"
replace hhinc_num = 64999.5 if hhinc== "$60,000-$69,999 per year"
replace hhinc_num = 13899.5 if hhinc== "$7,800-$19,999 per year"
replace hhinc_num = 74999.5 if hhinc== "$70,000-$79,999 per year"
replace hhinc_num = 84999.5 if hhinc== "$80,000-$89,999 per year"
replace hhinc_num = 94999.5 if hhinc== "$90,000-$99,999 per year"
replace hhinc_num = . if hhinc== "I prefer not to answer"
replace hhinc_num = 0 if hhinc== "No income or negative income"
replace hhinc_num = 250000 if hhinc== "Over $250,000 per year"

* Use mean household inc for top-coded group w/ data available to pick a single
* number for the top-coded group where data unavailable.
summarize hhinc_num if hhinc_num>=160000
scalar hhincmean_160_plus=r(mean)
display hhincmean_160_plus

replace hhinc_num = 25000 if hhincq=="1st quartile" & hhinc_num==.
replace hhinc_num = 75000 if hhincq=="2nd quartile" & hhinc_num==.
replace hhinc_num = 130000 if hhincq=="3rd quartile" & hhinc_num==.
replace hhinc_num = hhincmean_160_plus if hhincq=="4th quartile" & hhinc_num==.

* Generate cash use deviation from separate dataset linked by respondent IDs
* (can also be calculated from payments diary data)
merge 1:1 Respondent_ID using "INTERNAL_ONLY\Data\Working Data\CPS_Cash_Use_Shares"
 replace number = "" if number =="NA"
gen cash_payments_share_number = number
destring cash_payments_share_number,replace

* Generate cash use, income and age as deviations from the mean (absolute)
summarize cash_payments_share_number,detail
scalar cash_payments_share_number_mean = r(mean)
display cash_payments_share_number_mean

* Check variables
summarize hhinc_num,detail
scalar hhinc_mean=r(mean)
display hhinc_mean

summarize age_final,detail
scalar age_mean=r(mean)
display age_mean

summarize 

* Scaling for presentational purposes
gen hhinc_num_dev_abs = (hhinc_num-hhinc_mean)/100000
gen age_final_dev_abs = (age_final-age_mean)/10
gen cash_share_dev_abs = cash_payments_share_number-cash_payments_share_number_mean

*** Figure 4 Calculations
gen prevent_fraud=0
replace prevent_fraud = 1 if transdetperm_1=="Yes"

gen assess_tax=0
replace assess_tax = 1 if transdetperm_9=="Yes"

gen manage_spend = 0
replace manage_spend = 1 if transdetperm_6=="Yes"

gen targeted_off = 0
replace targeted_off = 1 if transdetperm_2=="Yes"

gen anon_data = 0
replace anon_data = 1 if transdetperm_8=="Yes"

gen link_data = 0
replace link_data = 1 if transdetperm_7=="Yes"

eststo clear

eststo: mean prevent_fraud assess_tax manage_spend targeted_off anon_data link_data

* Output means and confidence intervals for the groups to Excel
esttab using INTERNAL_ONLY/Outputs/Figure_4_Data.csv, replace ///
	label ci level(95)
eststo clear

* Keep only variables used in the regressions
keep Respondent_ID choosea dfee dcommercialclaim drbaclaim drbaonly dcommercialonly daustraconly drbaaustrac dcommercialaustrac hhincq_final  hhinc_num age_quartile age_final weight rbabankacc_table gender hhsize state educat disab birthcountry hhinc_num_dev hhinc_num_dev_abs age_final_dev age_final_dev_abs cash_share_dev_abs cash_payments_share_number rbaclaim_a rbaclaim_b privacy_a privacy_b privacyext_a privacyext_b

*** Check for missing values, cash share excluded as it's not in main regressions
misstable summarize
gen missing_indicator = 1 if choosea==.|dfee==.|drbaclaim==.|drbaonly==.| ///
	dcommercialonly==.| daustraconly==.| drbaaustrac==.| dcommercialaustrac==.| ///
	weight ==.| age_quartile ==.| hhincq_final==. | hhinc_num_dev_abs ==. | ///
	age_final_dev_abs == .

* Data quality - committed to have a consistent sample throughout
drop if missing_indicator==1

*** Save data for regressions
save "INTERNAL_ONLY\Data\Working Data\data_1_master_clean.dta",replace