'''
This notebook contains the automated update of the following processes:
1. Discover new earnings call transcripts from ASX listed companies using the Refinitiv API
2. Download new transcripts by iterating through list of new transcript ids AND
list of transcripts previously failed to be downloaded
'''
import rba
from rba.sandpit import SandpitConnection
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
import functools as ft
import numpy as np
import datetime
from tqdm.notebook import tqdm
from RefinitivConnection import RefinitivConnection
from RefinitivEarningsFetcher import RefinitivEarningsFetcher
from RefinitivEarningsParser import RefinitivEarningsParser
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
import yaml
with open('creds.yml', 'r') as file:
creds = yaml.safe_load(file)
rconn = RefinitivConnection(creds['RKD_USERNAME'], creds['RKD_PASSWORD'], creds['RKD_APP_ID'], session)
#### Creating class instances
earnings_fetcher = RefinitivEarningsFetcher(rconn)
earnings_parser = RefinitivEarningsParser(rconn)
old_events = pd.read_csv('Events/events.csv') # load existing events list
ric_list = list(set(old_events['RIC']))
TIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
max_time = pd.to_datetime(old_events['EventTime']).max() # last event time
start = (max_time - datetime.timedelta(days = 180)).strftime(TIME_FORMAT) # setting start time to be 6 months into the past
new_events = earnings_fetcher.fetch_events(RICs = ric_list, start = start,
listings_path = 'Listings/asx_listings.csv') # fetching new events
events = pd.concat([old_events, new_events], ignore_index=True) # merging new events into old
events = events.drop_duplicates(subset='EventId') # dropping duplicates
events = events[~events['TranscriptId'].isna()] # drop NAN transcriptIds
from os import listdir
from os.path import isfile, join
mypath = './Transcripts'
transcript_list = [f for f in listdir(mypath) if isfile(join(mypath, f))] # list of downloaded transcripts
transcript_list = [i.replace('.txt', '') for i in transcript_list]
to_load = [i for i in events.TranscriptId
if i not in transcript_list]
############ Downloading New Transcripts ##################
failed_to_load = []
downloaded = []
for i in tqdm(list(reversed(to_load))):
text = earnings_parser.get_transcript_xml(i)
if text == None:
failed_to_load.append(i)
else:
print('Download success: ', i)
with open(f'./Transcripts/{i}.txt', "w") as text_file:
text_file.write(text)
downloaded.append(i)
print(f'Failed Count: {len(failed_to_load)}',
f'Success Count: {len(downloaded)}', end = '\r')
events = events.drop(columns = 'Unnamed: 0')
events.to_csv('Events/events.csv')