# Title: Panel regression
# Creator: Calvin He
# Date Created: 07 March 2019
# Last Modified: 9 December 2019
# Description: Using LFE

panel_reg_lfe <- function(reg_data_pre, m , opts, y_calculate = TRUE){
  
  # Step 1: create y variable based on m = horizon
  if(y_calculate){
    reg_data <- reg_data_pre %>% group_by(region_name) %>% arrange(region_name, Date) %>% 
      mutate(y = case_when( opts$metric_diff == FALSE ~ dplyr::lead(calculation_value, m),
                            opts$metric_diff & opts$metric_log_diff ~ 100*( log(dplyr::lead(calculation_value,m)) - log(calculation_value)),
                            opts$metric_diff & opts$metric_log_diff == FALSE ~ dplyr::lead(calculation_value,m) - calculation_value , 
                            TRUE ~ 100*( log(dplyr::lead(calculation_value,m)) - log(calculation_value)) )) %>% # calculate y variable
      ungroup  %>%
      filter(!is.na(y)) 
  } else{
    reg_data <- reg_data_pre
  }
  
  # Step 2: create equation
  # Check if date is in fixed effects
  if ("Date" %in% opts$fixed_effect_vars){
    median_group <- median(1:opts$group_no) %>% floor # find median/benchmark group
    eqn <- paste0("y ~ ", paste(grep("mp_interaction", colnames(reg_data), value = T) %>% 
                                  grep(paste(median_group), . ,invert =T, value =T) , collapse = " + " )) # baseline equation (remove 5th group)
  }else{
    eqn <- paste0("y ~ ", paste(grep("mp_interaction", colnames(reg_data), value = T)  , collapse =" + " )) # baseline equation (remove 5th group)
  }
  
  # Add controls
  eqn <- case_when(length(opts$controls) >0  ~ paste0(eqn, " + ", paste("control_", opts$controls, sep= "", collapse = " + ")),
                   TRUE ~ eqn)
  
  # Add agg_control
  if ("Date" %in% opts$fixed_effect_vars){
    median_group <- median(1:opts$group_no) %>% floor # find median/benchmark group
    aggregate_controls <- colnames(reg_data)[str_detect(colnames(reg_data), "agg_control") & !str_detect(colnames(reg_data), paste0(median_group) )] 
    
    eqn <- case_when(length(opts$agg_controls) > 0  ~ paste0(eqn, " + ", paste(aggregate_controls, sep= "", collapse = " + ")),
                     TRUE ~ eqn)
  }else{
    aggregate_controls <- colnames(reg_data)[str_detect(colnames(reg_data), "agg_control") ] 
    eqn <- case_when(length(opts$agg_controls) > 0  ~ paste0(eqn, " + ", paste(aggregate_controls, sep= "", collapse = " + ")),
                     TRUE ~ eqn)
  }
  
  
  # Add ar terms
  eqn <- case_when(opts$ar_lag >0   ~ paste0(eqn, " + ", paste("control_ar_", 1:opts$ar_lag, sep= "", collapse = " + ")),
                   TRUE ~ eqn)
  # add fixed effects
  eqn <- paste0(eqn, " | ",   paste(opts$fixed_effect_vars,  collapse = " + " )) # add fixed effects
  
  # add cluster standard errors
  eqn <-  case_when(length(opts$cluster_vars) >0 ~ paste0(eqn, " | 0 | ",   paste(opts$cluster_vars, sep = "", collapse = " + " )),
                    TRUE ~ eqn )# add clusters
  
  
  # Step 3: Run regression
  reg_output <- lfe::felm(as.formula(eqn)  , data = reg_data) 
  reg_output$tidy_output <- tidy( reg_output) %>%  
    mutate(horizon = m , 
           mp_shock_lag = str_extract(term, "l\\d+") %>% gsub("l", "", .) %>% as.numeric,
           interaction_var =  opts$mp_interaction_var, 
           interaction_var_level = str_extract(term, "_\\d+") %>% gsub("_","",. ) %>% as.numeric,
           shock_sign = "both")
  
  return(reg_output)
  
}