# Author: Michelle Bergmann
# Research Economist, Economic Research Department, Reserve Bank of Australia
# Last modified: July 2020
# Code to estimate stage 1 models contained in RDP 'The Determinants of Mortgage Defaults in Australia'



#######################################################################################################
# 1. Load libraries
#######################################################################################################

library(tidyverse)
library(data.table)
library(survival)



#######################################################################################################
# 2. Regression specification
#######################################################################################################


### base specification

# Note: excludes SEIFA variable. Add SEIFA manually to regression estimates by adding "+SEIFA_IRSAD" in the estimating equation

base_regressors<-"
# equity and housing market variables
    lag_scheduled_LVR_buckets+
    original_LVR_buckets+
    turnover_ratio+
# labour market variables
    unemployment_rate_residents_5_years_ago+
    I(unemployment_rate_residents_5_years_ago^2)+
    mining_share_employment_SA3+
# serviceability variables
    serviceability_ratio_buckets+
    change_ir_buckets+
    high_income+
    buffer_max6m+
    IO_switching+
# loan characteristics corr with income volatility
    multiple_debtors+
    self_employed_na+
# loan controls
    investment+
    loan_purpose2+
    loan_origination_channel_na+
    fixed_rate+
    low_doc+
    previous_discharge+
# regional characteristics
    remoteness+
    state+
# deal characteristics
    type_short+
    deal_type+
# time FE
    factor(report_month)"



#######################################################################################################
# 3. Base model estimation
#######################################################################################################


### Model specification
# type: cox competing risks hazard model
# depvar: entered_90_days  - entries to 90+ day arrears or foreclosure
# competing risk: full repayment
# timevar: seasoning
# standard errors: robust standard errors clustered at SA3 region level 


# all loans

cox_arrears<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,entered_90_days)","~",base_regressors,"+SEIFA_IRSAD","+cluster(SA3)")),data=data)   

summary(cox_arrears)


hazard_ratios<-as.data.frame(summary(cox_arrears)$conf.int) %>%
  tibble::rownames_to_column("variable") %>% 
  transmute(variable,
            exp_coef=`exp(coef)`,
            lower=`lower .95`,
            upper=`upper .95`)


# mining and non-mining subsets

cox_arrears_mining<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,entered_90_days)","~",base_regressors,"+SEIFA_IRSAD","+cluster(SA3)")),data=data,subset=(MiningRegion=="Mining"))   

hazard_ratios_mining<-as.data.frame(summary(cox_arrears_mining)$conf.int) %>%
  tibble::rownames_to_column("variable")%>% 
  transmute(variable,
            exp_coef=`exp(coef)`,
            lower=`lower .95`,
            upper=`upper .95`)


cox_arrears_non_mining<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,entered_90_days)","~",base_regressors,"+SEIFA_IRSAD","+cluster(SA3)")),data=data,subset=(MiningRegion!="Mining"))   

hazard_ratios_non_mining<-as.data.frame(summary(cox_arrears_non_mining)$conf.int) %>%
  tibble::rownames_to_column("variable")%>% 
  transmute(variable,
            exp_coef=`exp(coef)`,
            lower=`lower .95`,
            upper=`upper .95`)


# negative equity subset

data$lag_scheduled_LVR_buckets<-relevel(data$lag_scheduled_LVR_buckets,ref="(100,110]")

cox_arrears_neg_eq<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,entered_90_days)","~",base_regressors,"+SEIFA_IRSAD","+cluster(SA3)")),data=data,subset=(indexed_scheduled_LVR>100&!lag_scheduled_LVR_buckets%in%c('(0,30]','(30,40]','(40,50]','(50,60]','(60,70]','(70,80]','(80,90]','(90,100]')))

data$lag_scheduled_LVR_buckets<-relevel(data$lag_scheduled_LVR_buckets,ref="(60,70]")



# depvar = refinances/ early repayments

cox_repaid<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,refinanced)","~",base_regressors,"+SEIFA_IRSAD","+cluster(SA3)")),data=data)   #+cluster(SA3) for robust standard errors clustered at region level





#######################################################################################################
# 4. Estimates excluding SEIFA variable
#######################################################################################################


# all loans

cox_arrears_ex_seifa<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,entered_90_days)","~",base_regressors,"+cluster(SA3)")),data=data)   

summary(cox_arrears_ex_seifa)


hazard_ratios_ex_seifa<-as.data.frame(summary(cox_arrears_ex_seifa)$conf.int) %>%
  tibble::rownames_to_column("variable") %>% 
  transmute(variable,
            exp_coef=`exp(coef)`,
            lower=`lower .95`,
            upper=`upper .95`)


# mining subsets

cox_arrears_ex_SEIFA_mining<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,entered_90_days)","~",base_regressors,"+cluster(SA3)")),data=data,subset=(MiningRegion=="Mining"))   

hazard_ratios_ex_seifa_mining<-as.data.frame(summary(cox_arrears_ex_SEIFA_mining)$conf.int) %>%
  tibble::rownames_to_column("variable") %>% 
  transmute(variable,
            exp_coef=`exp(coef)`,
            lower=`lower .95`,
            upper=`upper .95`)



cox_arrears_ex_SEIFA_non_mining<-coxph(as.formula(paste("Surv(seasoning_hazard_start,seasoning_hazard,entered_90_days)","~",base_regressors,"+cluster(SA3)")),data=data,subset=(MiningRegion!="Mining"))   

hazard_ratios_ex_seifa_non_mining<-as.data.frame(summary(cox_arrears_ex_SEIFA_non_mining)$conf.int) %>%
  tibble::rownames_to_column("variable") %>% 
  transmute(variable,
            exp_coef=`exp(coef)`,
            lower=`lower .95`,
            upper=`upper .95`)







