class RefinitivResultsFetcher:
results_url = 'https://api.rkd.refinitiv.com/api/FilingsSearch2/FilingsSearch2.svc/REST/FilingsSearch2_1/SearchSubmissions_1'
def __init__(self, rconn):
self.rconn = rconn
self.headers = {}
app_id = self.rconn.headers['X-Trkd-Auth-ApplicationID']
token = self.rconn.headers['X-Trkd-Auth-Token']
self.headers['Cookie'] = f'RkdAppId={app_id}; RkdToken={token};'
@staticmethod
def get_documents_url(doc):
return f'https://api.rkd.refinitiv.com/api/FilingsRetrieval3/FilingsRetrieval3.svc/docs/{doc}/'
@staticmethod
def get_results_msg(perm_ids=None, form_names=None, row_count=0):
if perm_ids is None:
perm_ids = []
if form_names is None:
form_names = [
"InterimResults",
"PreliminaryResults",
"PrelimQ2",
"InterimH1"
]
results_msg = {
"SearchSubmissions_Request_1": {
"IDOptions": {
"OAPermIDs": {
"OAPermID": perm_ids
}
},
"SECOptions": {
"FormNames": {
"FormName": form_names
}
},
"ResponseOptions": {
"startRow": 0,
"rowCount": row_count,
"submissionSortOrder": "releaseDate",
"sortDirection": "Desc"
}
}
}
return results_msg
def get_max_rows(self, perm_ids, form_names=None):
results_msg = self.get_results_msg(perm_ids, form_names)
response = self.rconn.sendRequest(self.results_url, results_msg)
return response.json()['SearchSubmissions_Response_1']['totalHit']
def get_results_df(self, perm_ids, form_names=None):
max_rows = self.get_max_rows(perm_ids, form_names)
results_msg = self.get_results_msg(perm_ids, form_names, max_rows)
response = self.rconn.sendRequest(self.results_url, results_msg)
results = response.json()['SearchSubmissions_Response_1']['submissionStatusAndInfo']
entries = [result['submissionInfo'][0] for result in results]
return pd.DataFrame(pd.json_normalize(entries))
def write_pdfs_to_path(self, df=None, doc_ids=None, pdf_path='Results'):
if df is not None:
df = df[df['fileType'] == 'pdf']
doc_ids = df['commonID'].unique().tolist()
elif doc_ids is None:
return
processed_docs = {f[:-4] for f in os.listdir(pdf_path)}
bad_count = 0
bad_docs = []
proxies = {
'http': self.rconn.session.proxies["https"],
'https': self.rconn.session.proxies["http"]
}
for doc in tqdm(set(doc_ids) - processed_docs):
response = requests.get(url=self.get_documents_url(doc), headers=self.headers, proxies=proxies)
if response.status_code != 200:
bad_count += 1
print(f'{response.status_code} bad count: {bad_count}')
bad_docs.append(doc)
continue
with open(f'{pdf_path}/{doc}.pdf', 'wb') as file:
file.write(response.content)