%% RUN_data
%
% This script estimates the core/periphery structures of an array of
% networks and reproduces Figures 2,3,8-11 in Brassil and Nodari (2018).
%
% =========================================================
% AUTHOR AND INSTITUTION INFO:
% Written by Anthony Brassil and Gabriela Nodari
% Reserve Bank of Australia
% This version: January 2018
% ==========================================================

clear all
clc
close all

%% Parameters
Iter = 5000; % Number of random starting points for optimisation algorithm
Vstart = 1; % First estimator to be used
Vend = 4; % Last estimator to be used 

% When Flag=1 and data.mat and ID.mat contain the data used in Brassil and 
% Nodari (2018), this script will replicate the figures and statistics 
% in our paper. This is required because of the features of our dataset 
% (e.g. which banks are the major banks) that will not be applicable to
% other datasets.
% If running the script with a different dataset or at a frequency other
% than quarterly, set Flag=0.
Flag = 0;

%% Load data
% Load the data file with two variables:
% 1. banks_status_mtx is a TxN matrix. Cell (t,i) contains the status of node i
% in network t. If there are any edges connected to node i in network t, 
% the bank is active and has a value of one. If never any edges, the node 
% is never active and has a value of NaN. If no edges in network t but edges 
% in some network s~=t, then (t,i) has a value of zero.
%
% 2. loans_aggreg contains five columns: 
% The first column is an ID number that is the same for all edges in 
% network t (e.g. the ID is the date for quarter t). The second column 
% contains the ID of the lending node, the third column contains the ID of 
% the borrowing node. If network t has an edge from node k to node m, then 
% there must be a row in loans_aggreg with the ID of network t in the first 
% column, the ID of node k in the second column, and the ID of node m in 
% the third column. Each element in the fourth column equals the number of 
% loans going along that edge in network t. Each element in the fifth
% column equals the total value of loans going along that edge in network
% t.

% IMPORTANT:
% 1. Node IDs must be positive integers. To minimise the size of adjacency
% matrices, produce node IDs starting with 1 and with increments of 1.
% 2. In loans_aggreg, the edges for network t must all be in rows above
% the edges in network t+1 (for all t). In other words, the loans_aggreg
% matrix must be sorted so that the edges for network 1 come first, the
% edges for network 2 come second, and so on. To explain why:
% 2a. If the networks are not in the correct order, they will not align 
% with banks_status_mtx. 
% 2b. If there are some edges for network t, then some edges for network 
% t+1, then some edges for network t again (i.e. the edges are not grouped 
% together), then the adjacency matrices will not be properly constructed.

load('data.mat','banks_status_mtx','loans_aggreg')

%% Construct adjacency matrices
adj_ALL = getNetwork(loans_aggreg,banks_status_mtx);

%% Estimate core/periphery structure
CPvecGlob = cell(Vend,size(adj_ALL,2));
CPvecAllGlob = cell(Vend,size(adj_ALL,2));
coreSize_min = zeros(size(adj_ALL,2),Vend);
coreSize_max = zeros(size(adj_ALL,2),Vend);

for v = Vstart:Vend
    str = ['Version: ',num2str(v),'/',num2str(Vend)];
    disp(str)
    [CPvecGlob(v,:),CPvecAllGlob(v,:),coreSize_min(:,v),coreSize_max(:,v)]...
        = coreEst(banks_status_mtx,adj_ALL,v,Iter);
end
clear str v

%% Plot Figure 8
plot_figure_8

%% Select core/periphery used in paper
% For the density-based estimator, there are two quarters where two
% error-minimising CP splits were found. This is discussed in the paper.
% The code below selects the CP split used for figures 9 and 10.
if Flag == 1
    tmp = CPvecAllGlob{3,14};
    CPvecGlob{3,14} = tmp(tmp(:,2)==0,:);
    tmp2 = CPvecAllGlob{3,43};
    CPvecGlob{3,43} = tmp2(tmp2(:,1)==1,:);
    clear tmp tmp2
end

%% Construct density index
Dens = DensIndex(CPvecGlob(3,:),banks_status_mtx,adj_ALL);

%% Plot figure 10
plot_figure_10

%% Plot figure 11
if Flag == 1
    % Construct array of loan values along each edge and for each network
    Vol_array = getVols(loans_aggreg,banks_status_mtx);

    % Identifying the major banks is required for replicating Figure 11.
    load ID.mat
    
    % Construct volume series
    Vol = zeros(size(banks_status_mtx,1),2);
    for t=1:size(banks_status_mtx,1)
        Vol(t,1) = sum(sum(Vol_array(maj==1,maj'==0,t),1),2)./1000000000;
        Vol(t,2) = sum(sum(Vol_array(maj==0,maj'==1,t),1),2)./1000000000;
    end
    
    plot_figure_11
    clear Vol_array t
end

%% Plot Figure 9
plot_figure_9 % Requires ID.mat to be loaded if Flag=1

if Flag == 1
    clear maj foreign
end

%% Plot figures 2 and 3
[FDeg,FDegin,FDegout,FDegDistin,FDegDistout,logprobDatain,logprobDataout]...
    = getDegs(adj_ALL,banks_status_mtx);
plot_figures_2_3

%% Save output
filename = ['Core_',num2str(size(banks_status_mtx,1)),'.mat'];
save(filename,'-v7.3')
    