###################################################################################################
# Quantile Spacing Methods
#
# Codes adapted from R codes written by Larry Schmidt
# Link: https//sites.google.com/site/lawrencedwschmidt/qreg_spacing_code
# Luke Hartigan, 2020
##################################################################################################

# Load required libraries
if (!require("quantreg")) {
    stop("\'qreg_spacing_methods.R\' requires the \'quantreg\' package!\n", call. = FALSE)
}

# Compute the p-value for a two-sided test statistic
two_sided_pvalue <- function(x)
{
    return (2.0 * pnorm(abs(x), lower.tail = FALSE))
}

# Stationary resampling function with average block length equal to
# the inverse of the mean of a geometric distribution
# Conversion of Python code originally written by Josh Hemann:
# https://gist.github.com/josef-pkt/2279868
stationary_resample <- function(x, bloc_len)
{
    # Do some preliminary checks
    if (!is.matrix(x)) { x <- as.matrix(x) }

    # Compute some useful values
    nobs <- dim(x)[1L]
    if (nobs < bloc_len) {
        stop(sprintf("stationary_resample(): \'bloc_len\' (%d) larger than dimension of x (%d).\n",
                     bloc_len, nobs), call. = FALSE)
    }
    prob <- 1.0 / as.double(bloc_len)
    wrapped_x <- rbind(x, x)

    # Draw all random variates in one go for speed
    choices <- sample.int(n = nobs, replace = TRUE)
    alpha <- runif(n = nobs, min = 0.0, max = 1.0)

    # Setup storage for stationary bootstrap indices
    idx <- numeric(length = nobs)

    # The first index value
    idx[1L] <- choices[1L]

    # Generate the remaining index values
    for (i in 2L:nobs) {
        ifelse(test = prob < alpha[i],
               yes = idx[i] <- idx[i - 1L] + 1L,
               no = idx[i] <- choices[i])
    }

    resampled_x <- wrapped_x[idx, , drop = FALSE]

    # Return the resampled indices
    return (resampled_x)

}

# Compute `rho' to be used to compute the R^{1} metric
rho <- function(x, tau = 0.5)
{
    return (x * (tau - (x < 0.0)))
}

# Winsorization function for predicting the actual qauntiles
# Handles extreme tail values which can occur when using exp()
# Source: https://www.r-bloggers.com/2011/06/winsorization/
winsor <- function(x, multiple = 3, na.rm = FALSE)
{
    # Preliminary checks
    if (length(multiple) != 1L || multiple <= 0) {
        stop("winsor(): \'multiple\' value not supported\n", call. = FALSE)
    }

    mx <- median(x, na.rm = na.rm)
    y <- x - mx
    sx <- mad(x = y, center = 0.0, na.rm = na.rm) * multiple
    y[y > sx] <- sx
    y[y < -sx] <- -sx
    return (y + mx)
}

# Function to compute quantile regression spacings
qreg_spacing <- function(y, x, alpha, jstar, trunc_opt = TRUE, small = 1.0E-3)
{
    # Preliminary settings
    if(!is.matrix(y)) { y <- as.matrix(y) }
    if(!is.matrix(x)) { x <- as.matrix(x) }

    # Get some useful values
    nobs <- dim(x)[1L]
    nvar <- dim(x)[2L]
    np <- length(alpha)

    # jstar must be strictly less than the number of quantiles provided in alpha
    if (jstar < np) {
        tau <- alpha[jstar]
    } else {
        stop(sprintf("qreg_spacing(): \'jstar\' index value (%d) not appropriate given length of alpha (%d)!\n",
             jstar, np), call. = FALSE)
    }

    # Check response vector and design matrix have the same number of observations
    if (dim(y)[1L] != nobs) {
        stop("qreg_spacing(): Dimensions of \'y\' and \'x\' not equal!\n", call. = FALSE)
    }

    # Check if colnames is NULL and if so set to "xi" for i = 1,...,nvar
    if (is.null(colnames(x))) {
        x_names <- paste0('x', seq_len(nvar))
    } else {
        x_names <- colnames(x)
        if (x_names[1L] == "") {
            x_names[1L] <- "intercept"
        }
    }

    # Set aside results storage space
    r1_metric <- matrix(data = NA_real_, nrow = 1L, ncol = np)
    rownames(r1_metric) <- "R1 metric"
    colnames(r1_metric) <- as.character(alpha)

    coefficients <- matrix(data = NA_real_, nrow = nvar, ncol = np)
    rownames(coefficients) <- x_names
    colnames(coefficients) <- as.character(alpha)

    # Estimate the initial model to get starting coefficients and ehat0
    init_model <- rq.fit.fnb(x = x, y = y, tau = tau)

    # Extract the coefficients and residuals
    coefficients[, jstar] <- init_model$coefficients
    ehat0 <- init_model$residuals

    # Compute the R^{1} metric
    v1 <- sum(rho(x = ehat0, tau = tau))
    v0 <- sum(rho(x = (rq.fit.fnb(x = matrix(data = 1.0, nrow = nobs, ncol = 1L),
              y = y, tau = tau)$residuals), tau = tau))
    r1_metric[jstar] <- 1.0 - (v1 / v0)

    # Remove `init_model' as not needed anymore
    rm(init_model)

    # Copy ehat0 so we can reuse it
    ehat_t <- ehat0

    # Estimate the upper quantile spacings
    for (j in (jstar + 1L):np) {

        # Determine the quantile to estimate
        tau_t <- (alpha[j] - alpha[j - 1L]) / (1.0 - alpha[j - 1L])

        # Adjust data for truncation or removal
        if (trunc_opt) {
            # Indices of residuals greater than zero
            idx <- which(ehat_t > 0.0)
            nidx <- length(idx)
            x_t <- x[idx, , drop = FALSE]
            y_t <- log(pmax(ehat_t[idx], small))
        } else {
            # Indices of residuals greater than 'small'
            idx <- which(ehat_t > small)
            nidx <- length(idx)
            x_t <- x[idx, , drop = FALSE]
            y_t <- log(ehat_t[idx])
        }

        # Run quantile regression
        j_model <- rq.fit.fnb(x = x_t, y = y_t, tau = tau_t)

        # Extract the coefficients
        coefficients[, j] <- j_model$coefficients

        # Compute the R^{1} metric
        v1 <- sum(rho(x = j_model$residuals, tau = tau_t))
        v0 <- sum(rho(x = (rq.fit.fnb(x = matrix(data = 1.0, nrow = nidx, ncol = 1L),
                  y = y_t, tau = tau_t)$residuals), tau = tau_t))
        r1_metric[j] <- 1.0 - (v1 / v0)

        # Update residuals for the next iteration
        ehat_t <- ehat_t - exp(x %*% j_model$coefficients)

    }

    # Reset ehat_t
    ehat_t <- ehat0

    # Estimate the lower quantile spacings
    for (j in (jstar - 1L):1L) {

        # Determine the quantile to estimate
        tau_t <- (alpha[j + 1L] - alpha[j]) / alpha[j + 1L]

        # Adjust data for truncation or removal
        if (trunc_opt) {
            # Indices of residuals less than zero
            idx <- which(-ehat_t > 0.0)
            nidx <- length(idx)
            x_t <- x[idx, , drop = FALSE]
            y_t <- log(pmax(-ehat_t[idx], small))
        } else {
            # Indices of residuals less than 'small'
            idx <- which(-ehat_t > small)
            nidx <- length(idx)
            x_t <- x[idx, , drop = FALSE]
            y_t <- log(-ehat_t[idx])
        }

        # Run quantile regression
        j_model <- rq.fit.fnb(x = x_t, y = y_t, tau = tau_t)

        # Extract the coefficients
        coefficients[, j] <- j_model$coefficients

        # Compute the R^{1} metric
        v1 <- sum(rho(x = j_model$residuals, tau = tau_t))
        v0 <- sum(rho(x = (rq.fit.fnb(x = matrix(data = 1.0, nrow = nidx, ncol = 1L),
                  y = y_t, tau = tau_t)$residuals), tau = tau_t))
        r1_metric[j] <- 1.0 - (v1 / v0)

        # Update residuals for the next iteration
        ehat_t <- ehat_t + exp(x %*% j_model$coefficients)

    }

    # Collect results as a list object
    results <- list()
    results$coefficients <- coefficients
    results$r1_metric <- r1_metric
    results$y <- y
    results$x <- x
    results$alpha <- alpha
    results$jstar <- jstar
    results$trunc_opt <- trunc_opt
    results$small <- small
    results$call <- match.call()

    # Return results with class "qsp"
    results$call <- match.call()
    class(results) <- "qsp"
    return (results)

}

# Accessor methods for "qsp"
coef.qsp <- function(object, ...)
{
    return (object$coefficients)
}

# Print method for an object of class "qsp"
print.qsp <- function(x, digits = max(3L, getOption("digits") - 3L),
         print.gap = 3L, right = TRUE, ...)
{
    # Print the results
    cat("\nCall: ", paste(deparse(x$call), sep = '\n', collapse = '\n'), '\n', sep = "")
    cat(sprintf("\nDependent variable treatment: %s (small = %g)\n",
        ifelse(test = x$trunc_opt, yes = "Truncation", no = "Removal"), x$small))
    cat("\nCoefficients:\n")
    print.default(x$coefficients, digits = digits, print.gap = print.gap, right = right, ...)
    cat('\n')
}

# Predict quantiles for an object of class "qsp"
predict.qsp <- function(object, newx = NULL, multiple = 3)
{
    # Check if we have an object of class "qsp"
    if (!identical(class(object), "qsp")) {
        stop("predict.qsp(): \'object\' not of class \'qsp\'!\n", call. = FALSE)
    }

    # Are we computing fitted or predicted quantiles?
    if (is.null(newx)) {
        x <- object$x
    } else {
        x <- as.matrix(newx)
    }

    # Compute some useful values
    coeff <- object$coefficients
    jstar <- object$jstar
    nobs <- dim(x)[1L]
    np <- dim(coeff)[2L]

    # Results storage
    quantiles <- matrix(data = NA_real_, nrow = nobs, ncol = np)
    colnames(quantiles) <- colnames(coeff)

    # Compute the jstar quantile
    ehat0 <- x %*% coeff[, jstar]
    quantiles[, jstar] <- ehat0

    # Copy ehat0 so we can reuse it
    ehat_t <- ehat0

    # Compute the upper quantiles
    for (j in (jstar + 1L):np) {
        ehat_t <- ehat_t + winsor(x = exp(x %*% coeff[, j]), multiple = multiple)
        quantiles[, j] <- ehat_t
    }

    # Reset ehat_t
    ehat_t <- ehat0

    # Compute the lower quantiles
    for (j in (jstar - 1L):1L) {
        ehat_t <- ehat_t - winsor(x = exp(x %*% coeff[, j]), multiple = multiple)
        quantiles[, j] <- ehat_t
    }

    # Return the quantiles
    return (quantiles)

}

# Estimate the quantile spacing standard errors matrix by weighted bootstrap
qsp_bs_stderr <- function(y, x, alpha, jstar, trunc_opt = TRUE, small = 1.0E-3,
                          bl = sqrt(length(y)), nrep = 1000L, verbose = TRUE, seed = 12041948L)
{
    # Preliminary settings
    if(!is.matrix(y)) { y <- as.matrix(y) }
    if(!is.matrix(x)) { x <- as.matrix(x) }

    # Compute a few useful values
    set.seed(seed = seed)
    nobs <- dim(x)[1L]
    nvar <- dim(x)[2L]
    np <- length(alpha)
    tau <- alpha[jstar]

    # Check if colnames is NULL and if so set to "xi" for i = 1,...,nvar
    if (is.null(colnames(x))) {
        x_names <- paste0('x', seq_len(nvar))
    } else {
        x_names <- colnames(x)
    }

    # Establish results storage
    coef_array <- array(data = NA_real_, dim = c(nrep, np, nvar))
    coef_se <- matrix(data = NA_real_, nrow = nvar, ncol = np)
    rownames(coef_se) <- x_names
    colnames(coef_se) <- as.character(alpha)

    if (verbose) {
        cat('\n')
        cat("Starting stationary bootstrap procedure...")
        cat('\n')
    }

    # Run stationary bootstrap procedure
    yx <- cbind(y, x)
    for (j in seq_len(nrep)) {

        # Generate a stationary resample of the data
        yx_t  <- stationary_resample(x = yx, bloc_len = bl)

        # Estimate the quantile spacings
        b_model <- qreg_spacing(y = yx_t[,1L], x = yx_t[,-1L, drop = FALSE], alpha = alpha,
                                jstar = jstar, trunc_opt = trunc_opt, small = small)

        # Extract the coefficients and save them
        coef_t <- b_model$coefficients

        for (i in seq_len(nvar)) {
            coef_array[j, ,i] <- coef_t[i, ]
        }

        # Print progress if requested
        if (verbose) {
            cat(sprintf("Progress: -- %3.0f%% completed -- \b \r",
                        (as.double(j) / nrep) * 100.0))
        }

    }

    # Compute the standard errors as the standard deviation of the weighted bootstrap
    for (k in seq_len(nvar)) {
        coef_se[k, ] <- apply(X = coef_array[, , k], MARGIN = 2L, FUN = sd, na.rm = TRUE)
    }

    if (verbose) {
        cat('\n')
        cat("...stationary bootrap procedure completed.")
        cat('\n')
    }

    # Return standard errors
    return (coef_se)

}

# Summary method for class "qsp"
summary.qsp <- function(object, bl = sqrt(length(object$y)), nrep = 1000L,
                        verbose = TRUE, seed = 12041948L, ci_size = 0.05)
{
    # Check if we have an object of class "qsp"
    if (!identical(class(object), "qsp")) {
        stop("summary.qsp(): \'object\' not of class \'qsp\'!\n", call. = FALSE)
    }

    # Extract the estimated coefficients
    coeff <- object$coefficients

    # Estimate the coefficient standard errors by weighted bootstrap
    coef_se <- qsp_bs_stderr(y = object$y, x = object$x, alpha = object$alpha,
                             jstar = object$jstar, trunc_opt = object$trunc_opt,
                             small = object$small, bl = bl, nrep = nrep,
                             verbose = verbose, seed = seed)

    # Compute (two-sided) t-statistics and p-values
    tstat <- coeff / coef_se
    pval <- two_sided_pvalue(x = tstat)

    # Compute (1 - ci_size)% confidence intervals
    zs <- qnorm(p = ci_size / 2.0, lower.tail= FALSE)
    width <- (1.0 - ci_size) * 100.0
    lb_ci <- coeff - zs * coef_se
    ub_ci <- coeff + zs * coef_se

    # Rearrange results into a more logical format: coefs by quantile
    nvar <- dim(coeff)[1L]
    np <- dim(coeff)[2L]
    output <- matrix(data = NA_real_, nrow = (nvar * np), ncol = 6L)
    rownames(output) <- paste(rownames(coeff), rep(x = colnames(coeff),
                              each = nvar), sep = '_')
    colnames(output) <- c("Estimate", "Std. Error", "t-statistic", "p-value",
                          sprintf("-%2.0f%% C.I.", width), sprintf("+%2.0f%% C.I.", width))

    for (i in seq_len(np)) {
        bn <- ((i * nvar) - (nvar - 1L))
        en <-  (i * nvar)
        output[bn:en, ] <- cbind(coeff[, i], coef_se[, i], tstat[, i],
                                 pval[, i], lb_ci[, i], ub_ci[, i])
    }

    # Collect results as a list object
    results <- list()
    results$output <- output
    results$trunc_opt <- object$trunc_opt

    # Return results with class "summary.qps"
    class(results) <- paste("summary", class(object), sep = '.')
    return (results)

}

# Print method for class "summary.qsp"
print.summary.qsp <- function(x, digits = max(3L, getOption("digits") - 3L),
                              print.gap = 3L, right = TRUE, ...)
{
    # Print the results
    cat(sprintf("\nDependent variable treatment: %s (small = %g)\n",
        ifelse(test = x$trunc_opt, yes = "Truncation", no = "Removal"), x$small))
    cat("\nCoefficients:\n")
    print.default(x$output, digits = digits, print.gap = print.gap, right = right, ...)
    cat('\n')
}

# EOF
