# Star Wars at Central Banks Script - User-defined functions for p-curve method
# Adapted from Simonshohn et al (2014) p-curve app 4.06, accessed on 20 November 2019
# Last updated: 26 November 2019

# 1. Function for calculating non-centrality parameters (NCP) for chi-squared distributions
#    for a given level of power (this is necessary for the second p-curve test). Note
#    all of our test statistics are z-scores, which Simonshohn et al converts to 
#    chi-squared scores to rationalise the amount of code they used). We do the same.

# Args: 
#   df = degrees of freedom of the chi-squared test
#   power = statistical power

# Returns:
#   a vector of real number corresponding to the ncp of a chi-squared distribution given df and power

get_chi_ncp <- function(df, power) {
  xc <- qchisq(p = 0.95, df = df)
  error <- function(ncp_est, power, x, df) {pchisq(x, df = df, ncp = ncp_est) - (1 - power)}
  ncp_root_est <- uniroot(f = error, interval = c(0, 1000), x = xc, df = df, power = power)["root"]
  return(ncp_root_est)
}

# 2. Function that makes numbers look like a percentage (e.g. it converts 0.06 t 6%)

# Args: 
#   x = some atomic vector with a numerical type
#   digits = how many digits to retain after the decimal point
#   format = species how the numbers are written (f means numbers are written as xxx.xxx
#            where the x's are any digits)

# Returns:
#   a character vector with percentage like strings e.g. "6%"

get_percent <- function(x, digits = 0, format = "f", ...)   {
  paste(formatC(100 * x, format = format, digits = digits, ...), "%", sep = "")
}

# 3. Function for bounding p-values that are very close to zero or one to avoid errors

# Args: 
#   p = vector of p-values

# Returns:
#   a vector of p-values with bounded values

pbound <- function(p) {
  pmin(pmax(p, 2.2e-16), 1-2.2e-16)
}

# 4. Function for calculating the % of p-values that are expected to be smaller
#    than a given alpha out of all the p-values submitted for p-curve analysis,
#    and if the statsitical power is 33% (this is for the second p-curve test).

# Args
#   pc = the alpha or p-critical threshold chosen

# Returns:
#   

prop33 <- function(pc, ncparam = ncp33) {
  prop <- ifelse(pvalues < 0.05, 1 - pchisq(qchisq(1 - pc, df = 1), df = 1, ncp = ncparam), NA)
  prop
}

# 5. Function for conducting the Stouffer test for a vector of pp-values (meta hypothesis test)

# Args:
#   pp = numerical vector containing pp-values

# Returns:
#

stouffer <- function(pp) {
  sum(qnorm(pp), na.rm = TRUE)/sqrt(sum(!is.na(pp)))
}

# 6. Function for conducting the Fisher test for a vector of pp-values (meta hypothesis test)

# Args:
#   pp = numerical vector containing pp-values

# Returns:
#

fisher <- function(pp) {
  -2*sum(log(pp),na.rm = TRUE)
}

# 7. Function for constructing p-curve data by placing all p-values below some threshold, alpha, in five equal-sized buckets and createing data  
#    values for the 'no effect' line, i.e. the uniform distribution of p-values that should be observed if the null hypothesis for each test were true

# Args:
#   p = vector containing p-values that you want to include in the pcurve
#   alpha = the range of values that you want to p-curve over (the standard range is 0-0.05 per the original paper). Alpha can be 0.01, 0.025 or 0.05.

# Returns: a dataframe with p-values that have been summarised by grouping into bins
#

get_pcurve_data <- function(p, alpha = 0.05) {
  binned_data <- if (alpha == 0.05) {mutate(as.data.frame(p), bins = case_when(pvalues <= 0.01 ~ 0.005,
                                                                               pvalues <= 0.02 ~ 0.015,
                                                                               pvalues <= 0.03 ~ 0.025,
                                                                               pvalues <= 0.04 ~ 0.035,
                                                                               pvalues <= 0.05 ~ 0.045,
                                                                               pvalues > 0.05 ~ NA_real_
  ))} else if (alpha == 0.01) {mutate(as.data.frame(p), bins = case_when(pvalues <= 0.002 ~ 0.001,
                                                                         pvalues <= 0.004 ~ 0.003,
                                                                         pvalues <= 0.006 ~ 0.005,
                                                                         pvalues <= 0.008 ~ 0.007,
                                                                         pvalues <= 0.01 ~ 0.009,
                                                                         pvalues > 0.01 ~ NA_real_
  ))} else {stop("Please choose a value for alpha that is in the following set: (0.01, 0.025, 0.05)")}
  binned_data <- binned_data %>% 
    filter(!is.na(bins)) %>%
    mutate(bins = factor(bins, levels = (if (alpha == 0.05) {c(0.005, 0.015, 0.025, 0.035, 0.045)}
                         else if (alpha == 0.01) {c(0.001, 0.003, 0.005, 0.007, 0.009)}
                         else {stop("Please choose a value for alpha that is in the following set: (0.01, 0.025, 0.05)")}))) %>% 
    group_by(bins, .drop = FALSE) %>%
    summarise(freq = n()) %>%
    mutate(`Observed p-curve` = freq/sum(freq)*100,
           `Null of no effect` = 20)
  binned_data
}  

# 8. Function for constructing pp-values

# Args:
#   p = numerical vector containing p-values
#   alpha = the range of values that you want to p-curve over (the standard range is 0-0.05 per the original paper). Alpha can be 0.01, 0.025 or 0.05.

# Returns: a numerical vector containing pp-values that can feed into the Stouffer or Fisher test
#

get_pp_values <- function(p, alpha = 0.05) {
  pp <- as.numeric(ifelse(p < alpha, (1/alpha)*p, NA)) %>%
    pbound()
}

