'''
Builds price, cost, demand and other indexes from 'analytical_database.xlsx'.
'''
import pandas as pd
import numpy as np
from scipy.stats import zscore
import time
import datetime
import matplotlib.pyplot as plt
# Reading events meta data, reading database table for index building
df_events = pd.read_csv('Events/events.csv')
df = pd.read_excel('analytical_database.xlsx', parse_dates = ['EventTime'])
df['Quarters'] = df['EventTime'].dt.to_period("Q")
df['HalfYear'] = pd.to_datetime(df['EventTime'].apply(lambda x: datetime.date(x.year, 6, 30) if x.month <7
else datetime.date(x.year, 12, 31)))
### dictionary quarterly and half yearly counts
df_dictionary = df[['EventTime', 'Quarters', 'HalfYear',
'price_aggregate', 'price_aggregate_neg', 'demand_aggregate', 'demand_aggregate_neg',
'cost_aggregate', 'cost_aggregate_neg', 'labour costs','labour costs_neg',
'margin_aggregate', 'margin_aggregate_neg',
'outlook_aggregate', 'outlook_aggregate_neg',
'total_words']]
df_dictionary_q = df_dictionary.sort_values(by = 'Quarters').reset_index()\
.drop(columns = 'index').groupby('Quarters', sort=False).sum() # quarterly
df_dictionary_h = df_dictionary.sort_values(by = 'HalfYear').reset_index()\
.drop(columns = 'index').groupby('HalfYear', sort=False).sum() # half-yearly
### zeroshot quarterly and half yearly counts
df_0shot = df[['EventTime', 'Quarters', 'HalfYear',
'zsl_costs_aggregate', 'zsl_costs_aggregate_neg',
'zsl_price_aggregate','zsl_price_aggregate_neg',
'zsl_consumer demand increase', 'zsl_consumer demand decrease',
'zsl_labour cost increase', 'zsl_labour cost decrease',
'zsl_business margin decrease','zsl_business margin increase',
'zsl_para_count']]
df_0shot_q = df_0shot.sort_values(by = 'Quarters').reset_index()\
.drop(columns = 'index').groupby('Quarters', sort=False).sum() # quarterly
df_0shot_h = df_0shot.sort_values(by = 'HalfYear').reset_index()\
.drop(columns = 'index').groupby('HalfYear', sort=False).sum() # half-yearly
def dictionary_index_df (dataframe, topics = ['cost_aggregate', 'demand_aggregate', 'price_aggregate',
'margin_aggregate', 'outlook_aggregate', 'labour costs']):
'''
constructs dictionary indexes from dataframe that contains
quarterly or half-yearly dictionary keyword counts
'''
res = pd.DataFrame()
for topic in topics:
topic_neg = topic+'_neg'
ind = zscore((dataframe[topic]-dataframe[topic_neg])/dataframe['total_words'])
res[topic] = ind
res.index = dataframe.index
return res
def zeroshot_index_df (dataframe):
'''
constructs zero-shot indexes from dataframe that contains
quarterly or half-yearly zero-shot paragraph counts
'''
res0 = pd.DataFrame()
res0['0shot_cost_index'] = zscore((dataframe['zsl_costs_aggregate']-\
dataframe['zsl_costs_aggregate_neg'])\
/dataframe['zsl_para_count'])
res0['0shot_price_index'] = zscore((dataframe['zsl_price_aggregate']-\
dataframe['zsl_price_aggregate_neg'])\
/dataframe['zsl_para_count'])
res0['0shot_demand_index'] = zscore((dataframe['zsl_consumer demand increase']-\
dataframe['zsl_consumer demand decrease'])\
/dataframe['zsl_para_count'])
res0['0shot_labour_index'] = zscore((dataframe['zsl_labour cost increase']-\
dataframe['zsl_labour cost decrease'])\
/dataframe['zsl_para_count'])
res0['0shot_margin_index'] = zscore((dataframe['zsl_business margin increase']-\
dataframe['zsl_business margin decrease'])\
/dataframe['zsl_para_count'])
res0.index = dataframe.index
return res0
fig, axes = plt.subplots(nrows=2, ncols=2)
fig.set_size_inches(12, 8)
dictionary_index_df(df_dictionary_q).plot(ax = axes[0,0], title = 'Dictionary Index, Quarterly', legend = False)
dictionary_index_df(df_dictionary_h).plot(ax = axes[0,1], title = 'Dictionary Index, Half-Year', legend = False)
zeroshot_index_df (df_0shot_q).plot(ax = axes[1,0], title = 'Zero-shot Index, Quarterly', legend = False)
zeroshot_index_df (df_0shot_h).plot(ax = axes[1,1], title = 'Zero-shot Index, Half-Year', legend = False)
plt.tight_layout()
plt.show()
# Dictionary
df_dict = df[['EventTime','price_aggregate', 'demand_aggregate','cost_aggregate',
'cost_aggregate_neg', 'price_aggregate_neg', 'demand_aggregate_neg', 'total_words']]
df_dict['YearMonth'] = df_dict['EventTime'].dt.to_period('M')
df_dict = df_dict.sort_values(by = 'YearMonth').reset_index().drop(columns = 'index')
df_dict_rolling_sum = df_dict.groupby('YearMonth', sort=False).sum().rolling(13, center= True).sum().dropna()
res = pd.DataFrame()
for topic in ['cost_aggregate', 'demand_aggregate', 'price_aggregate']:
topic_neg = topic+'_neg'
ind = zscore((df_dict_rolling_sum[topic]-df_dict_rolling_sum[topic_neg])/df_dict_rolling_sum['total_words'])
res[topic] = ind
res.index = df_dict_rolling_sum.index
res.plot()
plt.show()
df_0shot = df[['EventTime','zsl_costs_aggregate', 'zsl_costs_aggregate_neg',
'zsl_price_aggregate','zsl_price_aggregate_neg',
'zsl_consumer demand increase', 'zsl_consumer demand decrease', 'zsl_para_count']]
df_0shot['YearMonth'] = df_0shot['EventTime'].dt.to_period('M')
df_0shot = df_0shot.sort_values(by = 'YearMonth').reset_index().drop(columns = 'index')
df_0shot_rolling_sum = df_0shot.groupby('YearMonth', sort=False).sum().rolling(13, center= True).sum().dropna()
res0 = pd.DataFrame()
res0['zsl_cost_index'] = zscore((df_0shot_rolling_sum['zsl_costs_aggregate']-\
df_0shot_rolling_sum['zsl_costs_aggregate_neg'])\
/df_0shot_rolling_sum['zsl_para_count'])
res0['zsl_price_index'] = zscore((df_0shot_rolling_sum['zsl_price_aggregate']-\
df_0shot_rolling_sum['zsl_price_aggregate_neg'])\
/df_0shot_rolling_sum['zsl_para_count'])
res0['zsl_demand_index'] = zscore((df_0shot_rolling_sum['zsl_consumer demand increase']-\
df_0shot_rolling_sum['zsl_consumer demand decrease'])\
/df_0shot_rolling_sum['zsl_para_count'])
res0.index = df_0shot_rolling_sum.index
res0.plot()
plt.show()