% This program estimates a joint nominal and real term structure model for
% Australia for the RDP Hambur and Finlay (2018) "Affine Endeavour:
% Estimating a Joint Model of the Nominal and Real Term Strucutres of
% Interest Rates for Australia"
% The methodology is extension of  "A New Perspective on Gaussian Dynamic
% Term Structure Models" by Joslin, Singleton and Zhu (JSZ)

% The first part of the code estiamtes the model under the assumption that
% some portfolios of zero-coupon bonds are priced without errors (i.e. conditional onf the observed factors)using Maximum Likelihood.
% The second part estimates the model without this assumption using  Kalman
% filter, taking the previous parameters as starting points for
% optimisation.

% Code adapted from code for JSZ

% Code requires Matlab otpmisation and statistical toolboxes (latter only
% used to get PCs, so could be avoided with user written code)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Details on the model and parameter vectors are incldued in the
%sub-functions

%Important objects

%Ouput
% - fit_y_all(_r) is fitted version of the nominal (real) yields using ML.
% Maturiies by column, incremeted by one month
%  - fit_yrf is the fitted value of the expected nominal (real) short rate
% Maturiies by column, incremeted by one month
%  - fit_yrf_r is expected inflation
% Maturiies by column, incremeted by one month, start in second column
% kf versions are filtered
% cP are 'observed factors'. cP_filtered_kf are fitlered factors

clear all
close all
restoredefaultpath
addpath(genpath('jsz_library'))
clc

% Set folder for data files
data_loc = ; % Need to input data location here

%Do Kalman filter estimate? 1=yes, 0=no (can run straight to Kalman fitler,
%but will automatically pick last set of optimal parameters, which may not
%be the best set if running multiple sets of iterations)
Kalman=0;

%Only use nearest rates for Kalman filter (0=no, 1=yes)
Cut_r=1;

%Calibrate survey errors (1) or allow to be esitmated (0)?
cal_infl=0;
cal_inflq=0;
cal_inflLR=0;
cal_cash=0;

% Load data
DataYields= csvread(strcat(data_loc , 'Some Yields m.csv'));
DataYields_r= csvread(strcat(data_loc , 'Some Yields m15 - real adj.csv'));
DataSurveysInfl=csvread(strcat(data_loc , 'year_end_inflation.csv'));
%DataSurveysInfl_std=csvread(strcat(data_loc , 'year_end_inflation_std.csv'));
DataSurveysInflQ=csvread(strcat(data_loc , 'inflation_q.csv'));
%DataSurveysInflQ_std=csvread(strcat(data_loc , 'inflation_q_std.csv'));
DataSurveysCash=csvread(strcat(data_loc , 'Cash forecasts - m.csv'));
%DataSurveysCashSTD=csvread(strcat(data_loc , 'Cash forecasts - m_std.csv'));
%DataSurveys10y=csvread(strcat(data_loc , '10y forecasts - m.csv'));
DataSurveysInflLR=csvread(strcat(data_loc , 'LR_inflation.csv'));
%DataSurveysInflLR_std=csvread(strcat(data_loc , 'LR_inflation_std.csv'));
DataCloseR=csvread(strcat(data_loc , 'Closest_yield.csv'));

dt = 1/12; %Change in time between observations e.g. 1/12 is monthly data


%Reshape and format real yield data - as take PC need to keep stable set
%here. Can drop out parts of real curve later in KF once have inital estimates
[t_r , q_r] = size(DataYields_r);

mats_r=DataYields_r(1,2:q_r);
mats_periods_r=round(mats_r/dt);
yields_r=DataYields_r(2:t_r,2:q_r);

%Reshape survey data
[t_infl , q_infl] = size(DataSurveysInfl);
[t_inflLR , q_inflLR] = size(DataSurveysInflLR);
%[t_inflQ , q_inflQ] = size(DataSurveysInflQ);
[t_cash , q_cash] = size(DataSurveysCash);
%[t_10y , q_10y] = size(DataSurveys10y);

s_infl_hor=DataSurveysInfl(1,2:q_infl);
s_infl=DataSurveysInfl(2:t_infl,2:q_infl);
% s_infl_std=DataSurveysInfl_std(2,:);
s_infl_horQ=DataSurveysInflQ(1,2:q_inflQ);
s_inflQ=DataSurveysInflQ(2:t_inflQ,2:q_inflQ);
%s_infl_stdQ=DataSurveysInflQ_std(2,:);
s_cash=DataSurveysCash(2:t_cash,2:q_cash);
s_cash_hor=DataSurveysCash(1,2:q_cash);
%s_cash_std=DataSurveysCashSTD(2,:);
%s_10y=DataSurveys10y(2:t_10y,2:q_10y);
%s_10y_hor=DataSurveys10y(1,2:q_10y);
s_inflLR_hor=DataSurveysInflLR(1,2:q_inflLR);
s_inflLR=DataSurveysInflLR(2:t_inflLR,2:q_inflLR);
%s_infl_stdLR=DataSurveysInflLR_std(2,:);

find_r=DataCloseR(2:end,:); % Pick maturity closest to active bond 

if cal_infl==0, s_infl_std=[]; end
if cal_inflq==0, s_infl_stdQ=[]; end
if cal_inflLR==0, s_infl_stdLR=[]; end
if cal_cash==0, s_cash_std=[]; end
    




%Reshape and format nominal yield data 
yields_all=[yields, yields_r];

%Set up list of surveys and factor numbers to include if want to try
%different combinations (paper uses number 2)
Surveys=cell(4,11);
Surveys(1,:) = {[],[],[],[],[],[],[],[],[],[],[]};
Surveys(2,:) = {s_cash,s_cash_hor,s_cash_std,[],[],s_inflQ,s_infl_horQ,s_infl_stdQ, s_inflLR,s_inflLR_hor,s_infl_stdLR};
Surveys(3,:) = {[],[],[],[],[],s_inflQ,s_infl_horQ,s_infl_stdQ, s_inflLR,s_inflLR_hor,s_infl_stdLR};
Surveys(4,:) = {s_cash,s_cash_hor,s_cash_std,[],[],[],[],[], s_inflLR,s_inflLR_hor,s_infl_stdLR};


Factors=[3]; % Set of number of NOMINAL factors to loop over (set up to run different combinations if wanted)

FactorsR=[3]; % Set of number of 'real' factors to loop over

Boot=0; % Bootstrap instead of surveys (0=no, 1=yes, if so use surveys set 1 or 3 - infltion seems helpful in bed down level of inflation)

for fac = 1:1 %Loop for nominal factors. Need to change below if want to use
    for facR = 1:1
        
    %Choose factor structure
    N_j = 0; %Number of joint factors (zero if next ones are not zero - model seems harder to solve with these)
    N_n= Factors(fac) ; %number of nominal factors (zero if joint is not zero)
    N_r=FactorsR(facR); %Number of real factros, orthogonal to nominal factors (zero if joint is not zero)


    if N_n==0
        N=N_j;
        W = pcacov(cov(yields_all)); %Construct principal components as 'portfolios' of bonds
        W = W(:,1:N)';  % N*J
        cP = yields_all*W'; % T*N - these are the 'observed' portfolios
    else
        W_nom = pcacov(cov(yields)); %Construct principal components as 'portfolios' of nominal bonds
        W_nom = W_nom(:,1:N_n)';  % N_n*q_n
        cP_nom = yields*W_nom'; % T*N_n - these are the 'observed' portfolios for nominal bonds
        temp.Z = [ones(t-1,1), cP_nom ].'; % create vector of regressors for regression of real yeilds on nominal factors
        temp.A = yields_r'*temp.Z.'*inv(temp.Z*temp.Z.'); % get coefficent matrix
        temp.residuals = yields_r- (temp.A*temp.Z).'; % Cosntrcut residuals, which are othrogonal to nominal factors
        W_real_resid = pcacov(cov(temp.residuals)); %Construct principal components as 'portfolios' of residuals
        W_real_resid = W_real_resid(:,1:N_r)';  % N_r*q_r
        cP_real=temp.residuals*W_real_resid';   % T*N_r - these are the 'observed' portfolios for real bonds
        temp.Z1= [ones(t-1,1), yields_all ].'; % Regress all yields on real factors to get loadings
        temp.A1 = cP_real'*temp.Z1.'*inv(temp.Z1*temp.Z1');
        W_real=temp.A1(:,2:end); % N_r*q real loadings 
        W=[W_nom, zeros(N_n,q_r-1);W_real]; % N*q All loadings
        cP= [cP_nom, cP_real]; % T*N all factors
        N=N_n+N_r;
    end
    
    for sur = 2:2 %Loop for which surveys to include. See above
       
        for iter= 1:4 %Loop for iteration if want to check stability of estiamtes/ to get a few estimates to run the KF over in case allowing for 'errors' interacts with prefered starting place

%% Estimate the model by ML. 
%help sample_estimation_fun
VERBOSE = true;

%Optimisation program. sample_estimation_fun_r_survey_two_step_all allows
%for non-fixed P. sample_estimation_fun_r_survey_fix_iter is fixed P which must use if want bootstrap (best
%not include cash forecasts, inflation forecasts seem to help bed down level of
%infaltion).
[llks, AcP, BcP, AX, BX, kinfQ, K0P_cP, K1P_cP, sigma_e_n, sigma_e_r, K0Q_cP, K1Q_cP, rho0_cP, rho1_cP, cP, llkP, llkQ,  K0Q_X, K1Q_X, rho0_X_r, rho1_X_r, Sigma_cP, ... 
rho0_cP_r, rho1_cP_r] = ...
                    sample_estimation_fun_r_survey_two_step_all(yields_all,W,yields,yields_r , mats, mats_r, dt, VERBOSE, s_infl, s_infl_hor, s_infl_std, Surveys{sur,6}, Surveys{sur,7}, Surveys{sur,8}, Surveys{sur,9}, Surveys{sur,10}, Surveys{sur,11}, Surveys{sur,1}, Surveys{sur,2}, Surveys{sur,3},  Surveys{sur,4},Surveys{sur,5}, Boot, N_n, N_r);

                %sample_estimation_fun_r_survey_fix_iter(yields_all,W,yields,yields_r , mats, mats_r, dt, VERBOSE, s_infl, s_infl_hor, s_infl_std, [], [], [], Surveys{sur,1}, Surveys{sur,2}, Surveys{sur,3},  Surveys{sur,4},Boot(boot));
    
    
%[llks, AcP, BcP, AX, BX, kinfQ, K0P_cP, K1P_cP, sigma_e, K0Q_cP, K1Q_cP, rho0_cP, rho1_cP, cP, llkP, llkQ,  K0Q_X, K1Q_X, rho0_X, rho1_X] = ...
%        sample_estimation_fun_survey(W, yields, mats, surveys_cash, s_cash_hor, surveys_10, s_10_hor, dt, VERBOSE);

% Now with parameters get fitted values

%Calculate fitted values of yeilds
fit_y_all_obs=ones(t-1,1)*AcP + cP*BcP; % (t-1)*q, model-implied yields for observed
fit_y=fit_y_all_obs(:,1:q-1); % Just nominal
fit_y_r=fit_y_all_obs(:,q:end); % Just nominal

%All horizon fitted yields
[BcP_all, AcP_all]=gaussianDiscreteYieldLoadingsRecurrence(round((1:1:120)), K0Q_cP, K1Q_cP, Sigma_cP, rho0_cP*dt, rho1_cP*dt, dt);
[BcP_all_r, AcP_all_r]=gaussianDiscreteYieldLoadingsRecurrence_real(round((1:1:120)), K0Q_cP, K1Q_cP, Sigma_cP, rho0_cP_r*dt, rho1_cP_r*dt, rho0_cP*dt, rho1_cP*dt, dt);

fit_y_all=ones(t-1,1)*AcP_all + cP*BcP_all;
fit_y_all_r=ones(t-1,1)*AcP_all_r + cP*BcP_all_r;

%Calcualte fitted values of future expected short rates
fit_yrf=fit_short(cP,rho0_cP,rho1_cP,K0P_cP,K1P_cP,(1:1:120),N,t-1); %nominal rates
fit_yrf_r=fit_short(cP,rho0_cP_r,rho1_cP_r,K0P_cP,K1P_cP,(1:1:121),N,t_r-1);%inflation

% Get steady state implied rates
ss_cP=-K0P_cP'/K1P_cP'; %steady state implied factors
ss_i=rho0_cP+ss_cP*rho1_cP; %steady state implied nominal short-rate
ss_pi=rho0_cP_r+ss_cP*rho1_cP_r;  %steady state implied inflation
ss_r=ss_i-ss_pi;%steady state implied real short-rate

% Save output - change name strucutre to suit
name = strcat( num2str(N_n), 'nomEx' , num2str(N_r), ' ADJ factors 93 demean extra - ', num2str(boot), ' surveys ', num2str(sur), ' iteration ', num2str(iter));
save(name)

            end
        end
    end
end

%% Estimate model by KF - not using assumption of perfectly priced portfolio of bonds
if Kalman == 1

    if Cut_r==1
        find_r_use=find_r;
    else
        find_r_use=ones(t-1,q_r-1);
    end
        
    %Take lower triangular cholesky decomospotiion of Sigma to cut down on
    %number of parameters
    L0=chol(Sigma_cP, 'lower');
    inds = find(tril(ones(N)));
    cholSigma_cP = L0(inds);
    
    % If not calibrating, get out the estimated standard deviations for
    % survey errors
    if cal_cash == 0
        [llk_cash, s_cash_std]=llk_fun_cash(yields_all, W, K1P_cP, K0P_cP, rho0_cP, rho1_cP, dt, s_cash, s_cash_hor, s_cash_std);
    end
    if cal_infl == 0
        [llk_infl, s_infl_std]= llk_fun_infl_direct(yields_all, W, K1P_cP, K0P_cP, rho0_cP_r, rho1_cP_r, dt, s_infl, s_infl_hor, s_infl_std);
    end
    if cal_inflLR == 0
        [llk_inflLR, s_infl_stdLR] = llk_fun_infl_LR(yields_all, W, K1P_cP, K0P_cP, rho0_cP_r, rho1_cP_r, dt, s_inflLR, s_inflLR_hor, s_infl_stdLR);
    end
    if cal_inflq == 0
        [llk_inflQ,s_infl_stdQ]  = llk_fun_infl_q(yields_all, W, K1P_cP, K0P_cP, rho0_cP_r, rho1_cP_r, dt, s_inflQ, s_infl_horQ, s_infl_stdQ);
    end
    
    
    %Define function to maximise in KF
    llk_fun_kf = @(K1Q_X_I, kinfQ_I, cholSigma_cP_I, rho0_X_r_I, rho1_X_r_I, K1P_cP_I, sigma_e_n_I, sigma_e_r_I, s_infl_std_I, s_infl_stdQ_I, s_infl_stdLR_I, s_cash_std_I) ...
        LLK_KF0(yields_all, yields, yields_r, W, K1Q_X_I, kinfQ_I, cholSigma_cP_I, rho0_X_r_I, rho1_X_r_I, mats, mats_r, dt, [], K1P_cP_I, ... 
        sigma_e_n_I, sigma_e_r_I, s_infl, s_infl_hor, s_infl_std_I, s_inflQ, s_infl_horQ, s_infl_stdQ_I,s_inflLR, s_inflLR_hor, s_infl_stdLR_I, s_cash, s_cash_hor,s_cash_std_I,find_r_use);
    
    %Vector of parameter inputs
    X0=[ diag(K1Q_X); kinfQ; cholSigma_cP; rho0_X_r; rho1_X_r; vec(K1P_cP); sigma_e_n; sigma_e_r; s_infl_std'; s_infl_stdQ'; s_infl_stdLR'; s_cash_std'];
    
    % Set up bounds for the paramerers
    A = [zeros(1,N-1),1, zeros(1,8+N+N*(N+1)/2+N^2)];
    B = .95; % Most negative eigenvalue  is greater than negative of this
    Aeq=[];
    Beq=[];
    UB=[0.5.*ones(N,1);inf*ones(1+N*(N+1)/2+1+N,1)]; % Bounds for eigenvalues (equivalent to ones used in first step) and upper bound Sigma_cP
    LB=[-2.*ones(N,1); -inf];
    % Bounds for  of Sigma_cP
    A0 = ones(N);
    inds_diag    = find(ismember(find(tril(A0)), find(diag(diag(A0))))) + N+1;
    inds_offdiag = find(~ismember(find(tril(A0)), find(diag(diag(A0))))) + N+1;
    LB(inds_diag) = 1e-7;  % Avoid getting singular Sigma_cP, should be positive to be identified
    LB(inds_offdiag) = -inf;
    LB(N+1+N*(N+1)/2+1:N+1+N*(N+1)/2+1+N+N^2)=-inf;
    % Upper bounds for  of K1P_cP to ensure stantionary
    inds_diag_k = find(eye(N) == 1)+N+1+N*(N+1)/2+1+N;
    inds_offdiag_k = find(eye(N) ~= 1)+N+1+N*(N+1)/2+1+N;
    UB(inds_diag_k) = -1e-7;
    UB(inds_offdiag_k) = inf;
     % Bounds for  of sigmas to make positive
     UB(N+1+N*(N+1)/2+1+N+N^2+1:8+2*N+N*(N+1)/2+N^2) = inf;
     LB(N+1+N*(N+1)/2+1+N+N^2+1:8+2*N+N*(N+1)/2+N^2)=1e-7;
     
     cons=@nonlconE_r_kf; %Constraint to ensure stationarity under P - could require real eigenvalues too (next line)
 %   cons=@nonlconE_r_kf_imag;     
    TypX=X0+0.0001;
    
    options = optimset('Display','iter','Diagnostics','on','TolX',1e-8,'TolFun',1e-8,'TypicalX', TypX, 'MaxFunEvals', 10000);
    
    % Do optimisation 3 times to reset hessian. If ends due to too many
    % iterations, re-run lines 292-297 or increase MaxFunEvals in options
    X=X0;
    for i=1:3
        [X, llk_KF, exit_flag,~,~,~,hessian] = fmincon(@(Z) llk_fun_kf(diag(Z(1:N)),Z(N+1), Z(N+2:N+1+N*(N+1)/2),Z(N+N*(N+1)/2+2), Z(N+N*(N+1)/2+3:2*N+N*(N+1)/2+2),reshape(Z(2*N+N*(N+1)/2+3:2*N+N*(N+1)/2+N^2+2),[N,N]),Z(2*N+N*(N+1)/2+N^2+3), Z(2*N+N*(N+1)/2+N^2+4),Z(2*N+N*(N+1)/2+N^2+5),Z(2*N+N*(N+1)/2+N^2+6),Z(2*N+N*(N+1)/2+N^2+7),Z(2*N+N*(N+1)/2+N^2+8)),X,A,B,Aeq,Beq,LB,UB,cons,options);
        fprintf('Likelihood on step %d: %10.10g\tparameters:',i,llk_KF)
        fprintf('%3.3g\t',X)
        fprintf('\n')
    end
    
    %Take optimised parameters
    K1Q_X_KF=diag(X(1:N));
    kinfQ_KF=X(N+1);
    %Reshape chol sigma
    inds = find(tril(ones(N)));
    L(inds) = X(N+2:N+N*(N+1)/2+1);
    L = reshape(L, [N,N]);
    Sigma_cP_KF = L*L';
    
    rho0_X_r_KF=X(N+N*(N+1)/2+2);
    rho1_X_r_KF=X(N+N*(N+1)/2+3:2*N+N*(N+1)/2+2);
    K1P_cP_KF=reshape(X(2*N+N*(N+1)/2+3:2*N+N*(N+1)/2+N^2+2),[N,N]);
    K0P_cP_KF= mean(cP)'-(K1P_cP_KF+eye(N))*mean(cP)'; %Forces steady-state cP to observed mean of factors. Note uses mean of observed as this  is what other parameters optimised on (very little difference in filtered and observed cp means)
    sigma_e_n_KF=X(2*N+N*(N+1)/2+N^2+3);
    sigma_e_r_KF=X(2*N+N*(N+1)/2+N^2+4);
    s_infl_std_KF=X(2*N+N*(N+1)/2+N^2+5);
    s_infl_stdQ_KF=X(2*N+N*(N+1)/2+N^2+6);
    s_infl_stdLR_KF=X(2*N+N*(N+1)/2+N^2+7);
    s_cash_std_KF=X(2*N+N*(N+1)/2+N^2+8);
    
    
    %Re-do to get rest of output after re-optimisation
    [llks_KF, AcP_KF, BcP_KF, AX_KF, BX_KF, K0Q_cP_KF, K1Q_cP_KF, rho0_cP_KF, rho1_cP_KF, rho0_cP_r_KF, rho1_cP_r_KF, cP_KF, yields_filtered_KF, cP_filtered_KF] = ...
        jszLLK_KF_r_start(yields_all, yields, yields_r, W, K1Q_X_KF, kinfQ_KF, Sigma_cP_KF, rho0_X_r_KF, rho1_X_r_KF, mats, mats_r, dt, K0P_cP_KF, K1P_cP_KF, sigma_e_n_KF, sigma_e_r_KF, s_infl, s_infl_hor, s_infl_std_KF, s_inflQ, s_infl_horQ, s_infl_stdQ_KF,s_inflLR, s_inflLR_hor, s_infl_stdLR_KF, s_cash, s_cash_hor,s_cash_std_KF,find_r_use);
    
    
    fit_y_all_obs_KF=ones(t-1,1)*AcP_KF + cP_filtered_KF*BcP_KF; % (t-1)*q, model-implied yields
    fit_y_KF=fit_y_all_obs_KF(:,1:q-1);
    fit_y_r_KF=fit_y_all_obs_KF(:,q:end);

    %Loading for all fitted ields
    [BcP_all_KF, AcP_all_KF]=gaussianDiscreteYieldLoadingsRecurrence(round((1:1:120)), K0Q_cP_KF, K1Q_cP_KF, Sigma_cP_KF, rho0_cP_KF*dt, rho1_cP_KF*dt, dt);
    [BcP_all_r_KF, AcP_all_r_KF]=gaussianDiscreteYieldLoadingsRecurrence_real(round((1:1:120)), K0Q_cP_KF, K1Q_cP_KF, Sigma_cP_KF, rho0_cP_r_KF*dt, rho1_cP_r_KF*dt, rho0_cP_KF*dt, rho1_cP_KF*dt, dt);

    fit_y_all_KF=ones(t-1,1)*AcP_all_KF + cP_filtered_KF*BcP_all_KF;
    fit_y_all_r_KF=ones(t-1,1)*AcP_all_r_KF + cP_filtered_KF*BcP_all_r_KF;

    %Calculate fitted values of future expected short rates
    fit_yrf_KF=fit_short(cP_filtered_KF,rho0_cP_KF,rho1_cP_KF,K0P_cP_KF,K1P_cP_KF,(1:1:120),N,t-1); %nominal rates
    fit_yrf_r_KF=fit_short(cP_filtered_KF,rho0_cP_r_KF,rho1_cP_r_KF,K0P_cP_KF,K1P_cP_KF,(1:1:121),N,t_r-1);%inflation

    ss_cP_KF=-K0P_cP_KF'/K1P_cP_KF'; %steady state implied factors
    ss_i_KF=rho0_cP_KF+ss_cP_KF*rho1_cP_KF; %steady state implied nominal short-rate
    ss_pi_KF=rho0_cP_r_KF+ss_cP_KF*rho1_cP_r_KF;  %steady state implied inflation
    ss_r_KF=ss_i_KF-ss_pi_KF;%steady state implied real short-rate
    
end
