In [ ]:
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import requests
import os
import time
import json
In [ ]:
class RefinitivResultsFetcher:
    results_url = 'https://api.rkd.refinitiv.com/api/FilingsSearch2/FilingsSearch2.svc/REST/FilingsSearch2_1/SearchSubmissions_1'

    def __init__(self, rconn):
        self.rconn = rconn
        self.headers = {}

        app_id = self.rconn.headers['X-Trkd-Auth-ApplicationID']
        token = self.rconn.headers['X-Trkd-Auth-Token']

        self.headers['Cookie'] = f'RkdAppId={app_id}; RkdToken={token};'
    
    @staticmethod
    def get_documents_url(doc):
        return f'https://api.rkd.refinitiv.com/api/FilingsRetrieval3/FilingsRetrieval3.svc/docs/{doc}/'

    @staticmethod
    def get_results_msg(perm_ids=None, form_names=None, row_count=0):
        if perm_ids is None:
            perm_ids = []

        if form_names is None:
            form_names = [
                "InterimResults",
                "PreliminaryResults",
                "PrelimQ2",
                "InterimH1"
            ]

        results_msg = {
            "SearchSubmissions_Request_1": {
                "IDOptions": {
                    "OAPermIDs": {
                        "OAPermID": perm_ids
                    }
                },
                "SECOptions": {
                    "FormNames": {
                        "FormName": form_names
                    }
                },
                "ResponseOptions": {
                "startRow": 0,
                "rowCount": row_count,
                "submissionSortOrder": "releaseDate",
                "sortDirection": "Desc"
                }
            }
        }

        return results_msg
    
    def get_max_rows(self, perm_ids, form_names=None):
        results_msg = self.get_results_msg(perm_ids, form_names)
        response = self.rconn.sendRequest(self.results_url, results_msg)
        return response.json()['SearchSubmissions_Response_1']['totalHit']

    def get_results_df(self, perm_ids, form_names=None):
        max_rows = self.get_max_rows(perm_ids, form_names)
        results_msg = self.get_results_msg(perm_ids, form_names, max_rows)
        response = self.rconn.sendRequest(self.results_url, results_msg)

        results = response.json()['SearchSubmissions_Response_1']['submissionStatusAndInfo']
        entries = [result['submissionInfo'][0] for result in results]

        return pd.DataFrame(pd.json_normalize(entries))
    
    def write_pdfs_to_path(self, df=None, doc_ids=None, pdf_path='Results'):
        if df is not None:
            df = df[df['fileType'] == 'pdf']
            doc_ids = df['commonID'].unique().tolist()
        elif doc_ids is None:
            return

        processed_docs = {f[:-4] for f in os.listdir(pdf_path)}

        bad_count = 0
        bad_docs = []
        
        proxies = {
            'http': self.rconn.session.proxies["https"],
            'https': self.rconn.session.proxies["http"]
        }

        for doc in tqdm(set(doc_ids) - processed_docs):
            response = requests.get(url=self.get_documents_url(doc), headers=self.headers, proxies=proxies)

            if response.status_code != 200:
                bad_count += 1
                print(f'{response.status_code} bad count: {bad_count}')
                bad_docs.append(doc)
                continue

            with open(f'{pdf_path}/{doc}.pdf', 'wb') as file:
                file.write(response.content)
In [ ]: