
# Generic import data by region
# https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1410.02012-17?OpenDocument
import_data_by_region <- function( table_no,  sheet_name , skip_rows = 6, link = "https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1410.02012-17?OpenDocument"){
  # Import raw data - it's the fifth table
  raw_data <- import_abs_table(link, table_no ,  sheet_name = sheet_name, skip_rows = skip_rows)
  
  # Set column titles 
  colnames(raw_data) <- c("region_code", "region_name", "year", colnames(raw_data)[4:ncol(raw_data)])
  
  # Remove first row 
  raw_data <- raw_data[-1, ]
  
  # make everything except region_name numeric
  raw_data <- raw_data %>% select(region_name , everything())
  raw_data[2:ncol(raw_data)] <- sapply( raw_data[2:ncol(raw_data)], as.numeric) %>% as_tibble
  
  
  return(raw_data)
}


# Filter the data and clean
filter_data_by_region <- function(df, keep_years,  keep_vars, keep_region_codes = NULL, modify_keep_vars_name = TRUE){
  # Modify keep_regions_code
  if(is.null(keep_region_codes)){
    keep_region_codes <- unique(df$region_code)
  }
  
  # Filter and select
  df_mod <- df %>% filter(year %in% keep_years, region_code %in% keep_region_codes ) %>% 
    select(region_name, region_code, year, keep_vars)
  
  if(modify_keep_vars_name){
    # Modify column names
    colnames(df_mod) <- c("region_name", "region_code", "year", str_to_lower(keep_vars) %>% 
                            str_replace_all(., " " ,"_") %>% str_replace_all(., "\\(" ,"") %>% 
                            str_replace_all(., "\\)" ,"") %>% str_replace_all(., "\\." ,"") %>% 
                            str_replace_all(., "-", "") %>% str_replace_all(., "\\$","_") %>% 
                            str_replace_all(., "__", "_") %>% str_replace_all(., "%", "_percent") )
  }
  
  return(df_mod)
  
}


# wrapper import family and community
import_data_by_region_fam_com <- function(){
  data <- import_data_by_region( 11,  "Family and Community",  skip_rows = 6, link = "https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1410.02012-17?OpenDocument")
  return(data)
}  

# wrapper import income 
import_data_by_region_income <- function(){
  data <- import_data_by_region( 5, "Income (Including Govt. Allowan",  skip_rows = 6, link = "https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1410.02012-17?OpenDocument")
  return(data)
  
}





# wrapper for varaibles I want in fam com
filter_data_by_region_fam_com <- function(df, keep_years = 2018,  keep_vars = c("Households with mortgage repayments greater than or equal to 30% of household income",
                                                                                "Households with rent payments greater than or equal to 30% of household income"), 
                                          keep_region_codes = NULL, modify_keep_vars_name = TRUE){
  
  if(missing(df)){
    df <- import_data_by_region_fam_com()
  }
  output <- filter_data_by_region(df, keep_years, keep_vars)
  return(output)
}


# wrapper for variables I want in income table
filter_data_by_region_income <- function(df, keep_years = 2018,  keep_vars = c("Median Investment income", 
                                                                               "Investment income earners", 
                                                                               "Mean Investment income", 
                                                                               "Investment income as main source of income", "Median Employee income", 
                                                                               "Median Total income (excl. Government pensions)", 
                                                                               "Total income earners (excl. Government pensions)",
                                                                               "Persons earning $1-$499 per week"  , "Persons earning nil income", "Persons with a negative income",
                                                                               "Persons earning $500-$999 per week" ), 
                                         keep_region_codes = NULL, modify_keep_vars_name = TRUE){
  
  if(missing(df)){
    df <- import_data_by_region_income()
  }
  
  output <- filter_data_by_region(df, keep_years, keep_vars, keep_region_codes = keep_region_codes, modify_keep_vars_name = modify_keep_vars_name)
  
  # last cheeky edit of names
  colnames(output) <- str_replace_all(colnames(output), "persons", "share")
  
  return(output)
}





# Title: Import income data by ASGS regions
# Creator: Calvin He
# Date Created: 14 March 2019
# Description: Imports 2011-17 income data by regions


import_data_by_region_income_asgs <- function(download = TRUE ,link = "http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1410.02012-17?OpenDocument"){
  # Import raw data - it's the fifth table
  raw_data <- import_abs_table(link, 5, sheet_name = "Income (Including Govt. Allowan", skip_rows = 6)
  
  # Set column titles 
  colnames(raw_data) <- c("region_code", "region_name", "year", colnames(raw_data)[4:ncol(raw_data)])
  
  # Remove first row 
  raw_data <- raw_data[-1, ]
  
  # make everything except region_name numeric
  raw_data <- raw_data %>% select(region_name , everything())
  raw_data[2:ncol(raw_data)] <- sapply( raw_data[2:ncol(raw_data)], as.numeric) %>% as_tibble
  
  
  return(raw_data)
}


# Title: Keep certain column/s of income data by region
# Creator: Calvin He
# Date Created: 14 March 2019

filter_data_by_region_income_asgs <- function(df, keep_years = 2016,  keep_vars = "Median Investment income", keep_region_codes = NULL, modify_keep_vars_name = TRUE){
  
  # if df is missing do the generic import
  if(missing(df)){
    df <- import_data_by_region_income_asgs()
  }
  
  # Modify keep_regions_code
  if(is.null(keep_region_codes)){
    keep_region_codes <- unique(df$region_code)
  }
  
  # Filter and select
  df_mod <- df %>% filter(year %in% keep_years, region_code %in% keep_region_codes ) %>% 
    select(region_name, region_code, year, keep_vars)
  
  if(modify_keep_vars_name){
    # Modify column names
    colnames(df_mod) <- c("region_name", "region_code", "year", str_to_lower(keep_vars) %>% 
                            str_replace_all(., " " ,"_") %>% str_replace_all(., "\\(" ,"") %>% 
                            str_replace_all(., "\\)" ,"") %>% str_replace_all(., "\\." ,"") %>% 
                            str_replace_all(., "-", "") %>% str_replace_all(., "\\$","_") %>% 
                            str_replace_all(., "__", "_"))
  }
  
  return(df_mod)
}

# Economy and Industry ---------------------------------------------------------

import_data_by_region_economy_asgs <- function(download = TRUE ,link = "http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1410.02012-17?OpenDocument"){
  # Import raw data - it's the fifth table
  raw_data <- import_abs_table(link, 3, sheet_name = "Economy and Industry_ASGS", skip_rows = 5) # third table
  
  # Clean column titles
  colnames(raw_data)[str_detect(colnames(raw_data), "\\d")] <- NA 
  colnames(raw_data) <-  na.locf(colnames(raw_data), na.rm = FALSE) # bring names down
  colnames(raw_data) <- paste(colnames(raw_data), raw_data[1,], sep = "_") 
  colnames(raw_data) <- colnames(raw_data) %>% str_replace_all("NA_|NA|&| ", "") %>% 
    str_replace_all(",|-", "_") %>% str_to_lower
  
  colnames(raw_data) <- c("region_code", "region_name", "year", colnames(raw_data)[4:ncol(raw_data)])
  
  # Remove two rows 
  raw_data <- raw_data[-c(1:2), ]
  
  # make everything except region_name numeric
  raw_data <- raw_data %>% select(region_name , everything())
  raw_data[2:ncol(raw_data)] <- sapply( raw_data[2:ncol(raw_data)], as.numeric) %>% as_tibble
  
  
  return(raw_data)
}


filter_data_by_region_economy_asgs <- function(df, keep_years = 2016,  keep_vars = "meanhouseholdnetworth_meanhouseholdnetworth", keep_region_codes = NULL, modify_keep_vars_name = TRUE){
  
  # if df is missing do the generic import
  if(missing(df)){
    df <- import_data_by_region_economy_asgs()
  }
  
  # Modify keep_regions_code
  if(is.null(keep_region_codes)){
    keep_region_codes <- unique(df$region_code)
  }
  
  # Filter and select
  df_mod <- df %>% filter(year %in% keep_years, region_code %in% keep_region_codes ) %>% 
    select(region_name, region_code, year, keep_vars)
  
  if(modify_keep_vars_name){
    # Modify column names
    colnames(df_mod) <- c("region_name", "region_code", "year", str_to_lower(keep_vars) %>% 
                            str_replace_all(., " " ,"_") %>% str_replace_all(., "\\(" ,"") %>% 
                            str_replace_all(., "\\)" ,"") %>% str_replace_all(., "\\." ,"") %>% 
                            str_replace_all(., "-", "") %>% str_replace_all(., "\\$","_") %>% 
                            str_replace_all(., "__", "_"))
  }
  
  return(df_mod)
}


