
extract_currency_from_description <- function(desc) {
  # Handle a few special cases that are not handled by the process below
  if(is.na(desc)) {return(NA)}
  if(grepl("The euro is legal tender in ", desc)) {return("euro")}
  if(grepl(" San Marino adopted the euro.", desc)) {return("euro")}
  if(grepl("Through December 31, 2000, the currency of Greece was the Greek drachma.", desc)) {return("Greek drachma")}
  if(grepl("is the Zimbabwe dollar and is the sole legal tender", desc)) {return("Zimbabwe dollar")}
  if(grepl("The currency of the Federal Republic of Yugoslavia is the dinar.", desc)) {return("Yugoslavian dinar")}
  if(grepl("dollar .+ legal tender and circulates freely", desc)) {return("US dollar")}
  if(grepl("While the Somali shilling is the official currency", desc)) {return("Complex")}
  
  # Sometimes the IMF uses a vague term to refer to a currency, so replace these with more precise terms
  desc_precise <- gsub(" the dollar", " the US dollar",
                       gsub(" the baht", " the Thai baht",
                            gsub(" the sol", " the Peruvian sol",
                                 gsub(" the dinar", " the Serbian dinar",
                                      gsub( " the ariary", " the Malagasy ariary",
                                            gsub(" the guaraní", " the Paraguayan guaraní",
                                                 gsub( " the kwacha", " the Zambian kwacha",
                                                       gsub(" is manat", " is the Azerbaijan manat", desc))))))))
  
  # Sometimes the IMF changes the name they use to refer to a currency, even though the currency has not changed
  desc_consistent <-  gsub(" convertible marka", " convertible mark",
                           gsub( " Tanzania shilling", " Tanzanian shilling",
                                 gsub(" Uganda shilling", " Ugandan shilling",
                                      gsub(" the Sãotoméan dobra", " the São Tomé and Príncipe dobra",
                                           gsub(" the bolívar[,\\.]", " the bolívar fuerte\\.", # Not to be confused with the Venezuelan Bolivar (to 2017) or bolivar soberano (from 2017)
                                                desc_precise)))))
  
  # My code interprets full stops as indicating the end of a sentence, so remove full stops that have other meanings
  desc_no_full_stop <- gsub("U.S. dollar", "US dollar",
                            gsub("St. ", "St ",
                                 gsub("U.A.E. dirham", "UAE dirham",
                                      gsub("Lao P.D.R.", "Lao PDF",
                                           gsub(" is Swedish krona", " is the Swedish krona", desc_consistent)))))
  
  # Tidy the descriptions so the currencies are reported more nicely
  desc_clean <- gsub(" \\(.{1,35}\\)", "",
                     gsub("bolívar \\“soberano,\\”", "bolívar soberano,",
                          gsub(", issued by the ECCB", "",
                               gsub("dollar issued by the ECCB", "dollar", desc_no_full_stop))))
  
  # Extract the part of the first sentence after " is the " but before any comma
  if(regexpr("\\. ", desc_clean)==-1) {
    first_sentence <- desc_clean
  } else {
    first_sentence <- substr(desc_clean, start = 1, stop = regexpr("\\. ", desc_clean) - 1)
  }
  if(regexpr(" is the ", first_sentence)==-1) {
    return("Complex")
  } else {
    part_first_sentence  <-  substr(first_sentence, start = (regexpr(" is the ", first_sentence) + 8), stop = 300)
    if(regexpr(",", part_first_sentence)==-1) {
      return(gsub("\\.", "", part_first_sentence))
    } else {
      return(substr(part_first_sentence, start = 1, stop = (regexpr(",", part_first_sentence)-1)))
    }
  }
}

# variable is one of classification, anchor or currency
import_EAER <- function(path, country_facts, variable) {
  # Import raw data and remove any variables that are not relevant
  EAER_raw <- read.xlsx(path, startRow = 2, colNames = T, rowNames = F)
  EAER_filtered <- filter(EAER_raw, Category != "Classification" & Category != "Exchange rate anchor")
  
  # Define a function that extracts the chosen variable for a specified year and IFS code
  extract_value_for_country_year <- function(Year, rows_for_country) {
    # Extract data for the chosen country and year
    chosen_rows <- rows_for_country[rows_for_country$Year == Year, ]
    if(nrow(chosen_rows)==0) {return(NA)}
    
    # Identify the categories that the country belongs to in that year
    row_with_value <- chosen_rows[!is.na(chosen_rows$Status) & chosen_rows$Status == "yes", ]
    if(nrow(row_with_value)==0) {return("Not in any category")}
    
    # If the user wants the classification or exchange rate anchor, we extract the category for which status is "yes"
    if(variable %in% c("classification", "anchor")) {
      if(nrow(row_with_value)>1) {stop(paste0("IFS.Code ", row_with_value$IFS.Code[1], " in ", Year, " has multiple values"))}
      return(row_with_value$Category)
    }
    
    # If the user wants the currency, and the country has one, extract it from the description.
    if (variable %in% "currency") {
      # If the country lacks a currency, return an error as I need to investigate this further
      if(!("Currency" %in% row_with_value$Category)) {stop(paste0("IFS.Code ", IFS.Code, " has no currency in year ", Year))}
      
      # Extract the currency from the description
      # One exception is Estonia in 2010 or 2011, whose currency changed but description stayed the same,
      row_with_currency <- row_with_value[row_with_value$Category=="Currency", ]
      if(row_with_currency$Country == "Estonia" & Year == 2010) {return("Estonian Kroon")}
      if(row_with_currency$Country == "Estonia" & Year == 2011) {return("Euro")}
      
      # Another exception are countries that use the West African franc and East African franc
      # These currencies have the same description in AREAER, but are distinct currencies, albeit ones that have always been at parity
      if(str_to_title(extract_currency_from_description(row_with_currency$Description)) == "Cfa Franc") {
        if(row_with_currency$IFS.Code %in% c(638, 748, 662, 654, 678, 692, 722, 742)) {
          return("Cfa Franc Bceao")
        } else if (row_with_currency$IFS.Code %in% c(622, 626, 628, 642, 646, 634)) {
          return("Cfa Franc Beac")
        }
      }
      return(str_to_title(extract_currency_from_description(row_with_currency$Description)))
    }
  }
  
  # Extract the chosen variable for all countries
  IFS.Codes <- unique(EAER_raw$IFS.Code)
  extract_value_for_country <- function(IFS.Code) {
    rows_for_country <- EAER_filtered[EAER_filtered$IFS.Code == IFS.Code, ]
    values <- unlist(map(1999:2020, extract_value_for_country_year, rows_for_country = rows_for_country))
    zooreg(values, order.by = 1999:2020)
  }
  zoo_list <- map(IFS.Codes, extract_value_for_country)
  names(zoo_list) <- IFS.Codes
  
  # Make a single zoo object containing the chosen variable for all IFS.Codes
  zoo_merged <- na.trim(do.call(merge.zoo, zoo_list), sides = "both", is.na = "all")
  
  # Label the columns by country names rather than IFS.Codes
  extract_country_name <- function(IFS.Code) {
    country_name <- country_facts$country[country_facts$ifs_code == IFS.Code]
    if(length(country_name)==0) {return(IFS.Code)} else {return(country_name)}
  }
  colnames(zoo_merged) <- unlist(map(colnames(zoo_merged), extract_country_name))
  
  # Add Taiwan's currency, as Taiwan is absent from the AREAER dataset
  Taiwan <- zooreg(rep("New Taiwan Dollar", nrow(zoo_merged)), order.by = index(zoo_merged))
  return(merge.zoo(zoo_merged[ , colnames(zoo_merged) %in% country_facts$country], `Taiwan Province Of China` = Taiwan))
}