# Census generic import
# Description: generically import a census file from the data pack csvs.
# Datapacks can be found here: https://datapacks.censusdata.abs.gov.au/datapacks/

census_sa3_generic_import <- function(table_name = "T01", desc = "Tot_persons_C16_P"){
  # Find files ---------------------------------------------------
  folder <- "./data/census-data"
  file_path <- list.files(folder, pattern= table_name, full.names=TRUE )
  
  # Import and bind ----------------------------------------------
  rawdata <- lapply(file_path, function(x) fread(x) %>% gather(., key= "description", value="value", -SA3_CODE_2016) ) %>%
    bind_rows
  
  if(length(desc)== 1 & NA %in% desc){
    output <- rawdata
  } else{
    # filter based on desc
    output <- rawdata %>% filter(description %in% desc )
  }
  
  return(output)
  
}


# Title: Generate density variable
# Creator: Calvin He
# Date Created: 08 January 2019
# Description: Generate dataframe with density of a SA3 region in it

pop_density <- function(year = 2016, sa = 3){
  
 # ASGS sa3 mapping
  core_logic_sa3_code_mapping <- abs_sa_mapping()
  # Census dwelling numbers
  x <-  paste0("Tot_persons_C", str_sub(year, -2), "_P")
  population <- census_sa3_generic_import(desc = x)
  
  # Left Join
  output <- left_join(population, core_logic_sa3_code_mapping, by = "SA3_CODE_2016")
  
  # create density variable and select relevant columns
  if(sa == 3){
    output <- output %>% mutate(population_density = value/(1000*AREA_ALBERS_SQKM)) %>% 
      select(SA3_CODE_2016, population_density)
  }else if (sa == 4){
    output <- output %>% group_by(SA4_CODE_2016) %>% 
      summarise(population_density = sum(value, na.rm =T)/sum(1000*AREA_ALBERS_SQKM,  na.rm =T))
    
  }
  
  return(output)
}


# Calculate dwelling density variable
dwelling_density <- function(year = 2016, sa = 3){
  # Import SA3 Code-Name-Area mapping
  # core_logic_sa3_code_mapping<- read.csv("Data/SA3Codes_States_Mapping.csv", stringsAsFactors = FALSE) 
  core_logic_sa3_code_mapping <- abs_sa_mapping()
  
  # find year
  x <-  paste0("C", str_sub(year, -2), "_T_T")
  # Census dwelling numbers
  dwellings <- census_sa3_generic_import(table_name = "T14", desc =x)
  
  # Left Join
  output <- left_join(dwellings, core_logic_sa3_code_mapping, by = "SA3_CODE_2016")
  
  # create density variable and select relevant columns
  if(sa == 3){
    output <- output %>% 
      mutate(dwelling_density = value/AREA_ALBERS_SQKM) %>% 
      select(SA3_CODE_2016, dwelling_density, value) 
  } else if( sa == 4){
    output <- output %>% group_by(SA4_CODE_2016) %>% 
      summarise(dwelling_density = sum(value, na.rm =T)/sum(AREA_ALBERS_SQKM,  na.rm =T)) 
  }
  
  return(output)
}

# Census dwelling stock ----------------------------------------------------------------------------------
census_dwelling_stock_sa3 <- function(year = 2016){
  # find year
  x <-  paste0("C", str_sub(year, -2), "_T_T")
  # Census dwelling numbers
  dwellings <- census_sa3_generic_import(table_name = "T14", desc =x)
  return(dwellings)
}


# Census dwelling stock interpolation -----------------------------------------------------------------
census_dwelling_stock_sa3_interpolation_fill <- function(df, type = "interpolation", years = c(2006, 2011, 2016)){
  
  # Import dwelling stock from census
  dwelling_stock <- lapply(years, function(x){
    temp <- census_dwelling_stock_sa3(year = x) %>% 
      mutate(Date = ymd(paste0(x, "-09-30")), value = parse_number(value)) %>% 
      select(-description) %>% 
      rename(dwelling_stock_census = value)
  }) %>% bind_rows
  
  
  # join with df
  date_code_dwelling <- left_join(df %>% distinct(Date, SA3_CODE_2016) %>% 
                                    filter(!is.na(SA3_CODE_2016)), dwelling_stock, by = c("SA3_CODE_2016","Date"))
  
  # Group and interpolate or carry forward and backwards
  date_code_dwelling  <- date_code_dwelling %>% group_by(SA3_CODE_2016) %>% arrange(SA3_CODE_2016, Date) %>%    
    mutate(dwelling_stock_census = case_when(type == "interpolation" ~ na.approx(dwelling_stock_census, na.rm = FALSE, rule = 2),
                                             TRUE ~ na.locf(dwelling_stock_census, na.rm = FALSE) %>% na.locf(na.rm =FALSE, fromLast = TRUE))) %>% 
    ungroup
  
  # join back 
  df_final <- left_join(df, date_code_dwelling, by = c("Date", "SA3_CODE_2016"))
  
  return(df_final)
}

# Census dwelling stock: apartments --------------------------------
census_apartment_stock_sa3 <- function(year = 2016){
  # find year
  x <-  paste0("C", str_sub(year, -2), "_F_u_a_T_Tot")
  # Census dwelling numbers
  dwellings <- census_sa3_generic_import(table_name = "T14", desc =x)
  return(dwellings)
}


# Census dwelling stock/apartments ----------------------------------------
census_apartment_dwelling_ratio_sa3 <- function(year = 2016){
  data <- left_join(  census_apartment_stock_sa3(year = 2016) %>% rename(apartment_stock = value) %>% select(-description),
                      census_dwelling_stock_sa3(year = 2016) %>% rename(dwelling_stock = value) %>% select(-description),
                      by = "SA3_CODE_2016") %>% 
    mutate(apartment_share = 100*apartment_stock/dwelling_stock)
  
  return(data)
}



# Census population -------------------------------------------------------------------------
census_population_sa3 <-  function( year = 2016){
  x <-  paste0("Tot_persons_C", str_sub(year, -2), "_P")
  population <- census_sa3_generic_import(desc = x) 
  return(population)
}

# Census median age -------------------------------------------------------------------------
census_median_age_sa3 <- function(year = 2016){
  x <-  paste0("Med_age_persns_C", year)
  median_age <- census_sa3_generic_import(table_name = "T02", desc = x ) 
  return(median_age)
}


# Census owner and mortgage concentration ---------------------------------------------------
census_owner_concentration_sa3 <- function(year = 2016){
  x <- paste0("C", str_sub(year, -2),"_OWM_Tot")
  mortgage_owners <- census_sa3_generic_import(table_name = "T18A", desc = x ) %>% mutate(description = "mortgage_owners")
  
  x <- paste0("C", str_sub(year, -2),"_T_T")
  households <- census_sa3_generic_import(table_name = "T18B", desc = x ) %>% mutate(description = "households") 
  
  x <- paste0("C", str_sub(year, -2),"_OO_Tot")
  outright_owners <- census_sa3_generic_import(table_name = "T18A", desc = x ) %>% mutate(description = "outright_owners") 
  
  x <- paste0("C", str_sub(year, -2),"_R_T_T")
  renters <- census_sa3_generic_import(table_name = "T18A", desc = x ) %>% mutate(description = "renters") 
  
  all_data <- bind_rows(mortgage_owners, households, outright_owners, renters)
  all_data_wide <- data.table::dcast(all_data, SA3_CODE_2016 ~ description, value.var = "value" ) %>% 
    mutate(mortgage_concentration = 100*mortgage_owners/households,
           owner_concentration = 100*(mortgage_owners + outright_owners)/households,
           outright_concentration = 100*(outright_owners)/households,
           renter_concentration = 100*renters/households)
  return(all_data_wide)
}

# Census working age population -----------------------------------------------
# This will take the buckets where the bucket min_age >= min_age
# and the bucket_max_age < = max_age
# i.e it looks up for min_age and down for max_age
# gender can be "P" for people; "M" for male; "F" for female
census_population_by_age_sa3 <- function(year = 2016, min_age = 15, max_age = 84, gender = "P"){
  # Set up age buckets and census code (this is hard coded by looking at T01)
  age_buckets <- tibble(lower = c(0, 5, 15, 20, 25, 35, 45 , 55 , 65, 75, 85),
                        upper = c(4, 14, 19, 24, 34, 44, 54, 64, 74, 84, Inf ),
                        census_code = case_when(is.finite(upper) ~ paste0("Age_", lower, "_", upper, "_C", str_sub(year, -2), "_", gender),
                                                TRUE ~ paste0("Age_grp_85ov_C", str_sub(year, -2), "_", gender)))
  
  # Find which one's to import
  age_buckets_selected <- age_buckets %>% 
    mutate(selected = case_when( upper <= max_age & lower >= min_age ~ TRUE,
                                 TRUE ~ FALSE)) %>% 
    filter(selected == TRUE) 
  
  # Make custom description based on selected buckets 
  custom_description <-  paste0("Age_", min(age_buckets_selected$lower), "_", max(age_buckets_selected$upper), "_C", str_sub(year, -2), "_", gender)
  
  # Import selected population
  population <- census_sa3_generic_import(table_name = "T01", desc =  age_buckets_selected$census_code ) %>% 
    group_by(SA3_CODE_2016) %>% 
    summarise(value = sum(value)) %>% mutate(description = custom_description )
  
  return(population)
  
}

# Census industry composition --------------------------------------------------------
census_industry_share_sa3 <- function(year = 2016, industries = c("Constr", "Mining","Manuf", "Finan_Insu"), gender = "P"){
  
  # import industry numbers
  x <- paste0(industries, "_C", str_sub(year, -2),"_", gender)
  industry_people <- census_sa3_generic_import(table_name = "T34", desc = x) 
  
  # import total number of people (within the gender)
  x <- paste0("Tot_C", str_sub(year, -2),"_", gender)
  total_people <- census_sa3_generic_import(table_name = "T34", desc = x) %>% rename(total = value) %>% 
    select(-description)
  
  # combine and calculate shares
  industry_composition <- left_join(industry_people, total_people, by = "SA3_CODE_2016") %>% 
    mutate(share = 100*value/total) %>% 
    group_by(SA3_CODE_2016) %>% 
    summarise(contr_mining_manu_fin_industry_share = sum(share)) %>% 
    ungroup
  
  return(industry_composition)
}

# Create cyclical industry share variable
census_cyclical_industry_sa3 <- function(year = 2016, industries = c("Constr", "Accom_food", "Retail_tde"), gender = "P"){
  
  # import industry numbers
  x <- paste0(industries, "_C", str_sub(year, -2),"_", gender)
  industry_people <- census_sa3_generic_import(table_name = "T34", desc = x) 
  
  # import total number of people (within the gender)
  x <- paste0("Tot_C", str_sub(year, -2),"_", gender)
  total_people <- census_sa3_generic_import(table_name = "T34", desc = x) %>% rename(total = value) %>% 
    select(-description)
  
  # combine and calculate shares
  industry_composition <- left_join(industry_people, total_people, by = "SA3_CODE_2016") %>% 
    mutate(share = 100*value/total) %>% 
    group_by(SA3_CODE_2016) %>% 
    summarise(cyclical_industry_share = sum(share)) %>% 
    ungroup
  
  return(industry_composition)
}


# Census: Mortgage repayments ---------------------------------------------
census_mortgage_repayments_sa3 <- function(y = 2016){
  # Import 
  data <- census_sa3_generic_import(table_name = "T25", desc = NA)
  
  # Create data
  data_mod <- data %>% 
    mutate(year =  str_extract(description, "\\d{2}") %>%  # first two digits
             {glue::glue("20{.}")}  %>% 
             parse_number ) %>% # create year
    filter( year == y ,  str_detect(description, glue::glue("C{str_sub(y, -2)}_T_")) ) %>% # extract totals
    mutate(type =  str_remove(description, glue::glue("C{str_sub(y, -2)}_T_"))) %>% 
    group_by(type) %>% 
    mutate(mid_point = case_when(type == "NS" ~ 0,
                                 type == "T" ~ 0,
                                 type == "4000ov" ~ 5000,
                                 TRUE ~ mean( c(str_extract(type, ".*_") %>% str_remove("_") %>% parse_number,
                                                str_extract(type, "_.*") %>% str_remove("_") %>% parse_number ), na.rm = T ))) %>% 
    ungroup
  
  
}


# Census: T02 ---------------------------------------------------------------
census_t02_sa3 <- function(year = 2016, type = "Med_mortg_rep_mon"){
  # Import 
  data <- census_sa3_generic_import(table_name = "T02", desc = glue::glue("{type}_C{year}")) %>% 
    select(-description)
  return(data)
}

# Median mortage repayment
census_median_mortgage_payment_sa3 <- function(year = 2016){
  # Import 
  data <- census_t02_sa3(year = year , type = "Med_mortg_rep_mon")
  return(data)
}


census_median_rent_sa3 <- function(year = 2016){
  # Import 
  data <- census_t02_sa3(year = year , type = "Med_rent_weekly")
  return(data)
}

census_median_hh_income_sa3 <- function(year = 2016){
  # Import 
  data <- census_t02_sa3(year = year , type = "Med_tot_hh_inc_wee")
  return(data)
}


census_mortgage_servicing_ratio <- function(year = 2016){
  # Import mortgage payments
  data <- left_join(census_median_mortgage_payment_sa3(year = year) %>% rename(mortgage_payment = value) %>% mutate(mortgage_payment = 12*mortgage_payment),
                    census_median_hh_income_sa3(year = year) %>% rename(income = value) %>% mutate(income = 52.1429*income),
                    by = "SA3_CODE_2016") %>% 
    mutate(mortgage_servicing_ratio = 100* mortgage_payment/income)%>% 
    select(SA3_CODE_2016,mortgage_servicing_ratio)
  return(data)
}

census_rent_servicing_ratio <- function(year = 2016){
  # Import mortgage payments
  data <- left_join(census_median_rent_sa3(year = year) %>% rename(rent_payment = value) %>% mutate(rent_payment = 52.1429*rent_payment),
                    census_median_hh_income_sa3(year = year) %>% rename(income = value)%>% mutate(income = 52.1429*income), 
                    by = "SA3_CODE_2016") %>% 
    mutate(rent_servicing_ratio = 100* rent_payment/income) %>% 
    select(SA3_CODE_2016, rent_servicing_ratio)
  return(data)
}


# Housing Costs - rent or mortgage/income ---------------------------------
census_housing_servicing_ratio <- function(year = 2016){
  
  # Import Rent
  rent <- census_rent_servicing_ratio(year = year)
  
  # Import mortgage
  mortgage <- census_mortgage_servicing_ratio(year = year)
  
  # ownership type
  ownership_type <- census_owner_concentration_sa3(year = year)
  
  # combine 
  combined_data <- left_join(ownership_type, mortgage, by = "SA3_CODE_2016") %>% 
    left_join(., rent, by = "SA3_CODE_2016") %>% 
    transmute(SA3_CODE_2016,  mortgage_servicing_ratio,  rent_servicing_ratio, 
              housing_servicing_ratio = 0 * outright_owners/100 + 
                mortgage_servicing_ratio * mortgage_concentration/100 +
                rent_servicing_ratio * renter_concentration/100)
  
  return(combined_data)
}



# Census: Income growth ---------------------------------------------------
census_income_growth <- function(start_year = 2006, end_year = 2016, type = "Med_Famly_inc_we"){
  
  # Create quosures
  var_end <- sym(paste0("income_", end_year))
  var_end <- enquo(var_end)
  
  var_start <- sym(paste0("income_", start_year))
  var_start <- enquo(var_start)
  
  
  final_var <-sym(paste0("income_growth_", start_year,"_", end_year))
  final_var <- enquo(final_var)
  
  
  # Import 
  income <- left_join(census_t02_sa3(year = end_year, type = type ) %>% rename(!!var_end := value), 
                      census_t02_sa3(year = start_year, type = type ) %>% rename(!!var_start := value)) %>% 
    transmute(SA3_CODE_2016, 
              !!final_var := 100*( ( !!var_end/!!var_start )^(1/(end_year - start_year)) - 1)  )
  
  return(income)
}






# Census: Population growth -----------------------------------------------
census_population_growth <- function(start_year = 2006, end_year = 2016){
  # Create quosures
  var_end <- sym(paste0("pop_", end_year))
  var_end <- enquo(var_end)
  
  var_start <- sym(paste0("pop_", start_year))
  var_start <- enquo(var_start)
  
  
  final_var <- sym(paste0("pop_", start_year,"_", end_year))
  final_var <- enquo(final_var)
  
  # Import data
  population <- left_join(census_population_sa3(year = end_year ) %>% rename(!!var_end := value) %>% select(-description), 
                          census_population_sa3(year = start_year ) %>% rename(!!var_start := value)  %>% select(-description) ) %>% 
    transmute(SA3_CODE_2016, 
              !!final_var := 100*( ( !!var_end/!!var_start )^(1/(end_year - start_year)) - 1)  ) %>% 
    mutate(!!final_var := case_when(!!final_var == Inf ~ NaN, TRUE ~ !!final_var) ) # remove infinities
  
  return(population)
}




# Census: historical census ---------------------------------------------------------
historical_census_import <- function(filepath = "./data/census-data/historical-census.xlsx" ){
  

  # figure out sheet names 
  sheets <- readxl::excel_sheets(filepath)[str_detect(readxl::excel_sheets(filepath), "Table")] # find sheets with name "Table" in them
  
  data <- map(sheets, ~ readxl::read_excel(filepath, . )) # import all sheets into a list
  names(data) <- sheets # name based on sheet
  
  return(data)
}

historical_census_clean <-  function(df){
  
  # determine title row
  column_title_row <- which(df[,1] == "SA4 SA3 SA2 (2016) (Enumeration)")
  colnames(df) <-  df[column_title_row  , ]
  
  # remove useless columns
  df <- map_lgl(df, ~!is_logical(.)) %>% df[.] # remove any logical columns
  
  # remove second region_code column if it exists because it's stupid
  if (sum(df[column_title_row,] == "SA4 SA3 SA2 (2016) (Enumeration)")>1){
    df <- df[,  -max(which( df[column_title_row,] == "SA4 SA3 SA2 (2016) (Enumeration)")) ]
  }
  
  
  # extract 'groups'
  groups <-  df[ column_title_row - 1, ] %>% as_vector() %>% na.omit %>% unique # ignore NA's
  
  if (length(groups) >1) { # if there are groups make new column names
    # new column names with group
    unique_cats <- colnames(df)[-1 ] %>% unique() 
    
    if (length(unique_cats) > length(colnames(df)[-1 ])/2){ # because ABS made a typo that messes up the code
      unique_cats <- colnames(df)[-1 ] %>% {.[1:(length(.)/2)]}
    }
    
    colnames(df) <- c("region_code_name", map(groups, ~ paste0(unique_cats , "_", .)) %>% unlist() %>% str_to_lower())
  } else{
    colnames(df)[1] <- c("region_code_name")
  }
  
  # remove data above title_row
  df <- df[ (column_title_row+1):nrow(df), ]
  
  # remove row if all na
  df <- df[rowSums(is.na(df)) < ncol(df) , ]
  
  # convert to numbers 
  df <- map_at(df , colnames(df)[-1 ], as.numeric) %>% bind_cols
  
  # wide to long
  df <- gather(df, key = "variable", value = "value", - region_code_name )
  
  
  return(df)
}

# import and clean this horrendous data
historical_census_import_clean <- function(){
  output <- historical_census_import() %>% 
    map(., historical_census_clean) %>% 
    bind_rows(., .id = "table") %>% 
    mutate(region_name = str_match(region_code_name, "[^0-9]+") %>% str_squish(),
           region_code = str_match(region_code_name, "[0123456789]+") %>% str_squish() %>% parse_number) %>% 
    mutate(year = case_when(str_detect(table, "199") ~ str_match(table, "199\\d"), 
                            str_detect(table, "200") ~ str_match(table, "200\\d"),
                            TRUE ~ str_match(variable, "\\d{4}$") ) %>% parse_number,
           variable = str_remove(variable, glue::glue("_{year}"))) %>% # Make Year and remove year from variable name
    mutate_if(is_character, str_to_lower)
  return(output)
  
}


historical_census_extract <- function(df, y, var){
  if (missing(df)){
    df <- historical_census_import_clean()
  } 
  
  # output
  output <- df %>% filter(year %in% y, str_detect(variable, var))
  
  return(output)
  
}


historical_census_population_density <- function(df){
  
  sa3_mapping <- abs_sa_mapping()
  
  pop <- historical_census_extract(df, 1991, "all ages_persons")
  
  output <- left_join(sa3_mapping, pop, by = c("SA3_CODE_2016" = "region_code")) %>% 
    transmute(SA3_CODE_2016, pop_density_1991 = value/(1000*AREA_ALBERS_SQKM)) # thousands per square km
  return(output)
}


historical_census_dwelling_density <- function(df){
  
  sa3_mapping <- abs_sa_mapping()
  
  dwelling <- historical_census_extract(df, 1991, "total dwelling")
  
  output <- left_join(sa3_mapping, dwelling, by = c("SA3_CODE_2016" = "region_code")) %>% 
    transmute(SA3_CODE_2016, dwelling_density_1991 = value/AREA_ALBERS_SQKM)
  return(output)
}



historical_census_lfs <- function(df){
  
  data <- df %>% filter(year == 1991, table == "table 3", variable %in% c("employed", "unemployed","not in the labour force", "total aged 15 years and over(b)" ))
  output <-  dcast(data, region_code ~ variable ,fun.aggregate= sum, value.var = "value") %>% 
    transmute(region_code,   unemp_rate_1991 = 100*unemployed/(`total aged 15 years and over(b)` - `not in the labour force`),
              emp_to_working_age_pop_1991 = 100*unemployed/(`total aged 15 years and over(b)`))
  
  return(output)
}

historical_census_income <- function(df){
  
  income <- df %>% 
    filter(year == 1991, variable %in% c("median household income (weekly)", "median individual income (weekly)" )) %>% 
    dcast(region_code ~ variable , fun.aggregate= sum,  value.var = "value") %>% 
    transmute(region_code,  hh_income_1991 = `median household income (weekly)`,
              individual_income_1991 = `median individual income (weekly)`)
  
  return(income)
  
}


