#!/usr/bin/env python # Example of exporting registrations, members, and transactions with batched # results. A limited number of results are returned in each response. It can # vary based on the type, but is generally around 1000 records. # install dependencies with: python -m pip install -r requirements.txt import argparse import json import time import random import jwt import requests parser = argparse.ArgumentParser() parser.add_argument('--site-id', type=int, required=True) parser.add_argument('--client-id', required=True, help='client id for site. Probably the same as the certificate filename basename') parser.add_argument('--pem-file', required=True, help='filename for certificate key in PEM format') parser.add_argument('--type', required=True, choices=['registrations-2', 'members-2', 'transactions-2', 'accountingCodes'], help='type of records to export') parser.add_argument('--domain', default='leagueapps.io') parser.add_argument('--auth', default='https://auth.leagueapps.io') parser.add_argument('--last-updated', type=int, default=0) parser.add_argument('--last-id', type=int, default=0) parser.add_argument('--additional-params', nargs='*', help='additional query parameters in format key=value') args = parser.parse_args() if args.auth: print("using auth server {}".format(args.auth)) auth_host = args.auth else: auth_host = 'https://auth.leagueapps.io' # Make a request to the OAuth 2 token endpoint with a JWT assertion to get an # access_token def request_access_token(auth_host_url, client_id, pem_file): with open(pem_file, 'r') as f: key = f.read() now = int(time.time()) auth_url = '{}/v2/auth/token'.format(auth_host_url) claims = { 'aud': 'https://auth.leagueapps.io/v2/auth/token', 'iss': client_id, 'sub': client_id, 'iat': now, 'exp': now + 300 } assertion = jwt.encode(claims, key, algorithm='RS256') resp = requests.post(auth_url, data={'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer', 'assertion': assertion}) if resp.status_code == 200: return resp.json()['access_token'] else: print('failed to get access_token: ({}) {}'.format(resp.status_code, resp.text)) return None # Calculate seconds to sleep between retries. # # slot_time is amount of time to for each slot and is multiplied by the slot # random calculated slot to get the total sleep time. # # max_slots can be used to put an upper bound on the sleep time def exponential_backoff(attempts_so_far, slot_time=1.0, max_slots=0): if max_slots > 0: attempts_so_far = min(attempts_so_far, max_slots) return random.randint(0, 2 ** attempts_so_far - 1) * slot_time site_id = args.site_id record_type = args.type domain = args.domain sub_domain = 'admin' if record_type == 'accountingCodes': # accountingCodes endpoint doesn't have /export/ in it path = 'v2/sites/{}/{}'.format(site_id, record_type) else: path = 'v2/sites/{}/export/{}'.format(site_id, record_type) if domain == 'lapps-local.io': # for local testing the Google ESP isn't HTTPS url = 'http://{}.{}:8082/{}'.format(sub_domain, domain, path) else: url = 'https://{}.{}/{}'.format(sub_domain, domain, path) # Initialize the last-updated and last-id query parameters to be used between # requests. These should be updated after processing each batch of responses # to get more results. last_updated = args.last_updated last_id = args.last_id supports_last_values = record_type != 'accountingCodes' access_token = None batch_count = 0 # Maximum number of retries for a request max_attempts = 5 attempts = 0 combined_data = [] while attempts < max_attempts: attempts += 1 # Get an access_token if necessary if access_token is None: print('requesting access token: {} {}'.format(args.client_id, args.pem_file)) access_token = request_access_token(auth_host, args.client_id, args.pem_file) if access_token is None: break print('access token: {}'.format(access_token)) params = {'last-updated': last_updated, 'last-id': last_id} # set the access token in the request header headers = {'authorization': 'Bearer {}'.format(access_token)} # Add any additional parameters passed via command line, but exclude existing ones if args.additional_params: for param in args.additional_params: if '=' in param: key, value = param.split('=', 1) if key not in params: params[key] = value else: print(f"Warning: ignoring parameter '{key}' from additional-params (already set)") try: response = requests.get(url, params=params, headers=headers, timeout=10) except requests.exceptions.Timeout: wait_seconds = exponential_backoff(attempts, 1.42, 5) print('retry in {} seconds due to timeout'.format(wait_seconds)) time.sleep(wait_seconds) continue # access_token is invalid, clear so next pass through the loop will get a new one if response.status_code == 401: print('error({}): {}'.format(response.status_code, response.text)) access_token = None # immediately retry since it should get a new access token continue # Request can be retried, sleep before retrying if response.status_code == 429 or response.status_code >= 500: # sleep an exponential back-off amount of time wait_seconds = exponential_backoff(attempts, 1.42, 5) print('retry in {} on error status ({}): {}'.format(wait_seconds, response.status_code, response.reason)) time.sleep(wait_seconds) continue # error on request that can't be retried if response.status_code != 200: print('unexpected error ({}): {}'.format(response.status_code, response.reason)) # reasonably some sort of coding error and retry is likely to fail break # get the actual response JSON data records = json.loads(response.text) # Filter out the first record if it has same id AND lastUpdated as previous batch to avoid duplicates if supports_last_values and last_id > 0 and last_updated > 0 and len(records) > 0: first_record = records[0] if first_record.get('id') == last_id and first_record.get('lastUpdated') == last_updated: records = records[1:] # Remove first record, keep the rest # No more records, exit. if len(records) == 0: print('done.') break batch_count += 1 # successful request, reset retry attempts attempts = 0 # process the result records and do useful things with them print('processing batch {}, {} records'.format(batch_count, len(records))) combined_data.extend(records) if not supports_last_values: break # accountingCodes endpoint is not paginated, so no need to loop for record in records: # track last_updated and last_id so next request will fetch more records last_updated = record.get('lastUpdated') last_id = record.get('id') printFile = open("records.json", "w+") printFile.write(json.dumps(combined_data)) printFile.close()