In [ ]:
import pandas as pd
from tqdm import tqdm
In [ ]:
BAD_FIELDS = {'Gics', 'GicsLeaf', 'GicsName'}
MAX_RESULTS = 10000
GICS_STRINGS = [
    '10101020',
    '10102010',
    '10102020',
    '10102030',
    '10102040',
    '10102050',
    '15101010',
    '15101020',
    '15101030',
    '15101040',
    '15101050',
    '15102010',
    '15103010',
    '15103020',
    '15104010',
    '15104020',
    '15104025',
    '15104030',
    '15104040',
    '15104045',
    '15104050',
    '15105010',
    '15105020',
    '20101010',
    '20102010',
    '20103010',
    '20104010',
    '20104020',
    '20105010',
    '20106010',
    '20106015',
    '20106020',
    '20107010',
    '20201010',
    '20201050',
    '20201060',
    '20201070',
    '20201080',
    '20202010',
    '20202020',
    '20301010',
    '20302010',
    '20303010',
    '20304010',
    '20304020',
    '20305010',
    '20305020',
    '20305030',
    '25101010',
    '25101020',
    '25102010',
    '25102020',
    '25201010',
    '25201020',
    '25201030',
    '25201040',
    '25201050',
    '25202010',
    '25203010',
    '25203020',
    '25203030',
    '25301010',
    '25301020',
    '25301030',
    '25301040',
    '25302010',
    '25302020',
    '25501010',
    '25502020',
    '25503010',
    '25503020',
    '25504010',
    '25504020',
    '25504030',
    '25504040',
    '25504050',
    '25504060',
    '30101010',
    '30101020',
    '30101030',
    '30101040',
    '30201010',
    '30201020',
    '30201030',
    '30202010',
    '30202030',
    '30203010',
    '30301010',
    '30302010',
    '35101010',
    '35101020',
    '35102010',
    '35102015',
    '35102020',
    '35102030',
    '35103010',
    '35201010',
    '35202010',
    '35203010',
    '40101010',
    '40101015',
    '40102010',
    '40201020',
    '40201030',
    '40201040',
    '40202010',
    '40203010',
    '40203020',
    '40203030',
    '40203040',
    '40204010',
    '40301010',
    '40301020',
    '40301030',
    '40301040',
    '40301050',
    '45102010',
    '45102020',
    '45102030',
    '45103010',
    '45103020',
    '45201020',
    '45202030',
    '45203010',
    '45203015',
    '45203020',
    '45203030',
    '45301010',
    '45301020',
    '50101010',
    '50101020',
    '50102010',
    '50201010',
    '50201020',
    '50201030',
    '50201040',
    '50202010',
    '50202020',
    '50203010',
    '55101010',
    '55102010',
    '55103010',
    '55104010',
    '55105010',
    '55105020',
    '60101010',
    '60101020',
    '60101030',
    '60101040',
    '60101050',
    '60101060',
    '60101070',
    '60101080',
    '60102010',
    '60102020',
    '60102030',
    '60102040'
]
In [ ]:
 
In [ ]:
# Joins GICs labels to listings, Sector/Industry etc.
def add_GICs(listings_df, gics_labels_path):
    gics_labels = pd.read_csv(gics_labels_path)
    listings_df['GicsCode'] = listings_df['GicsCode'].astype(float)
    listings_df = listings_df.merge(gics_labels, how='left', on='GicsCode')

    return listings_df
In [ ]:
class RefinitivListingFetcher:
    url = 'https://api.rkd.refinitiv.com/api/Search2/Search2.svc/REST/Search2_1/Search_1'
    
    def __init__(self, rconn) -> None:
        self.rconn = rconn

    # Fetch the currently listed listings clean them and join them to the old listings
    # Save to path if provided
    def update_listings(self, old_path, gics_labels_path, new_path=None):
        responses = []

        request_msg = self.get_update_request_msg()
        response = self.rconn.sendRequest(self.url, request_msg)
        responses.append(response.json())
        clean_responses = self.clean_listing_responses(responses)

        curr_listings = pd.DataFrame(clean_responses)
        old_listings = pd.read_csv(old_path)

        new_listings = curr_listings[~curr_listings['RIC'].isin(old_listings['RIC'])]
        new_listings = add_GICs(new_listings, gics_labels_path)
        new_listings = pd.concat([old_listings, new_listings], ignore_index=True)

        if new_path is not None:
            new_listings.to_csv(new_path)
        
        return pd.DataFrame(new_listings)

    # get all historical listings, can only return 10000 responses at a time so split by gics
    def get_historical_listings(self, gics_labels_path, save_path=None):
        responses = []

        for gics in tqdm(GICS_STRINGS):
            request_msg = self.get_gics_request_msg(gics)
            response = self.rconn.sendRequest(self.url, request_msg)
            responses.append(response.json())

        responses.append(self.get_null_gics_response())
        clean_responses = self.clean_listing_responses(responses)

        listings = pd.DataFrame(clean_responses)
        listings = add_GICs(listings, gics_labels_path)

        if save_path is not None:
            listings.to_csv(save_path)

        return listings

    @staticmethod
    def get_update_request_msg():
        msg = {
            "Search_Request_1": {
                "Collection": "EquityQuotes",
                "Paging": {
                "Top": MAX_RESULTS,
                "Skip": 0
            },
            "Filter": "ExchangeCode eq 'ASX' and AssetType eq 'EQUITY' and AssetCategoryName eq 'Equity/Ordinary' and ListingStatusName eq 'Listed'",
            "ResponseProperties": "CommonName,IssuerCommonName,TickerSymbol,PrimaryRIC,RIC,PermID,IssuerOAPermID,OrganisationStatus,ListingStatusName,GicsCode",
            "UnentitledAccess": True
            }
        }

        return msg

    @staticmethod
    def get_gics_request_msg(gics):
        msg = {
            "Search_Request_1": {
                "Collection": "EquityQuotes",
                "Paging": {
                "Top": MAX_RESULTS,
                "Skip": 0
            },
            "Filter": f"ExchangeCode eq 'ASX' and AssetType eq 'EQUITY' and AssetCategoryName eq 'Equity/Ordinary' and GicsCode eq '{gics}'",
            "ResponseProperties": "CommonName,IssuerCommonName,TickerSymbol,PrimaryRIC,RIC,PermID,IssuerOAPermID,OrganisationStatus,ListingStatusName,GicsCode",
            "UnentitledAccess": True
            }
        }

        return msg
    
    def get_null_gics_response(self):
        msg = {
            "Search_Request_1": {
                "Collection": "EquityQuotes",
                "Paging": {
                "Top": MAX_RESULTS,
                "Skip": 0
            },
            "Filter": "ExchangeCode eq 'ASX' and AssetType eq 'EQUITY' and AssetCategoryName eq 'Equity/Ordinary' and (GicsCode eq null)",
            "ResponseProperties": "CommonName,IssuerCommonName,TickerSymbol,PrimaryRIC,RIC,PermID,IssuerOAPermID,OrganisationStatus,ListingStatusName,GicsCode",
            "UnentitledAccess": True
            }
        }

        response = self.rconn.sendRequest(self.url, msg)

        return response.json()

    @staticmethod
    def clean_listing_responses(responses):
        clean_responses = []

        # for each result produce a dict of keys and their values
        for i, response in enumerate(responses):
            total_results = response['Search_Response_1']['ResultsHeader']['TotalResults']
            returned_results = response['Search_Response_1']['ResultsHeader']['Results']

            if total_results == 0 or returned_results == 0:
                continue
            elif total_results > MAX_RESULTS:
                print(f'GICS: {GICS_STRINGS[i]} only returned {returned_results}/{total_results} results')

            properties = response['Search_Response_1']['Results']['Result']
            clean_responses.extend([{kvpair['name']: kvpair['Value'] for kvpair in p["Property"]} for p in properties])
        
        return clean_responses