####################################################################################################
# RDP 2021-03: Financial Conditions and Downside Risk to Economic Activity in Australia
####################################################################################################
# Construct a Financial Conditions Index (FCI) for Australia to be used in 'Growth-at-Risk' analysis
#
# This file is to implement necessary transformations to the 'fci_panel' data set to render it
# stationary before extracting factor(s).
#
# Luke Hartigan, 10-03-2021
####################################################################################################

# Clear the workspace
rm(list = ls(all = TRUE))

# Set directories
d_location <- "Data/"
c_location <- "Code/methods/"
r_location <- "Results/csv/"

# Source required functions
source(paste0(c_location, "misc_methods.R"))
source(paste0(c_location, "gar_methods.R"))     # project-specific functions (source last)

# Set up a few options
options(digits = 4)

# What are we doing?
cat("Pre-processing the financial conditions dataset...\n")

####################################################################################################
# Read in the data
####################################################################################################

panel <- read.csv(paste0(d_location, "fci_panel.csv"), header = TRUE, sep = ',', fill = TRUE)
info <- read.csv(paste0(d_location, "fci_info.csv"), header = TRUE, sep = ',', row.names = 1L)

# Drop the dates column
panel <- panel[,-1L, drop = FALSE]

# ID codes for operating on the panel
tlog <- info["tlog", , drop = FALSE]
tcode <- info["tcode", , drop = FALSE]
tgroup <- info["tgroup", , drop = FALSE]

# Rolling window length -- 40 quarters (i.e. 10 years) rolling demeaning procedure
rdx <- 40L

####################################################################################################
# Dataset by main category
####################################################################################################

# Create a quarterly sequence to use as row names
ts_begin_str <- "1976-09-01"
ts_end_str <- "2020-09-01" # *** Manually edit this line ***
ts_qtr_seq <- seq(from = as.Date(ts_begin_str), to = as.Date(ts_end_str), by = "quarter")

# Compute the number of series for each category
group_names <- c('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I')
ngroups <- length(group_names)
series_counts <- !is.na(panel)

# Results storage
num_series_cat <- zeros(dim(panel)[1L], ngroups)

for (i in seq_len(ngroups)) {
    num_series_cat[,i] <- as.matrix(rowSums(x = series_counts[, which(tgroup == group_names[i]), drop = FALSE]))
}

# Give the rows and columns useful names
colnames(num_series_cat) <- group_names
rownames(num_series_cat) <- as.character(ts_qtr_seq)

####################################################################################################
# Transform the data
####################################################################################################

# Notes:
# tcode == "t1" => No difference, i.e., Level (default)
# tcode == "t2" => 1st Difference, i.e., (1 - B)y

# Levels
ylv <- panel[, which(tlog == FALSE & tcode == "t1"), drop = FALSE]

# First difference
dylv <- apply(X = panel[, which(tlog == FALSE & tcode == "t2"), drop = FALSE],
              MARGIN = 2L, FUN = transform_series, take_log = FALSE, tcode = "t2")

# Log difference (compounded growth)
dyln <- apply(X = panel[, which(tlog == TRUE & tcode == "t2"), drop = FALSE],
              MARGIN = 2L, FUN = transform_series, take_log = TRUE, tcode = "t2")

# Fix the differing series lengths
ylv <- trim_row(x = ylv, a = (nrow(ylv) - nrow(dyln)), b = 0)

# Combine data into a dataframe
paneltf <- data.frame(cbind(ylv, dylv, dyln))

# Sort to be as originally ordered in 'fci_panel'
paneltf <- paneltf[, colnames(panel), drop = FALSE]

# NB: Allow for a structural break in the mean in 1993:Q1 due to the introduction of inflation targeting
paneltfs <- rolling_scale(x = paneltf, roll_len = rdx, center = TRUE, scale = FALSE)

# Scale the panel to have unit variance over the full sample
paneltfs <- scale(x = paneltfs, center = FALSE, scale = TRUE)

# Give the rows useful names
rownames(paneltfs) <- as.character(ts_qtr_seq[-1L]) # drop first period from first differencing

####################################################################################################
# Save data to an .RData file for future use and write .csv file to check for correctness
####################################################################################################

# Transformed and conditionally standardised (tfs) panel -- .RData file
save(paneltfs, file = paste0(d_location, "fci_data_tfs.RData"))

# Transformed and conditionally standardised (tfs) panel -- .csv file
write.table(x = paneltfs, file = paste0(d_location, "fci_data_tfs.csv"),
            append = FALSE, quote = FALSE, sep = ',', row.names = TRUE, col.names = NA)

# Number of series by data category
write.table(x = num_series_cat, file = paste0(r_location, "fci_num_series_cat.csv"),
            append = FALSE, quote = FALSE, sep = ',', row.names = TRUE, col.names = NA)

cat(sprintf("All files written to: %s\n", d_location))

# EOF
