{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "VkJIsfmNZ4OB" }, "source": [ "## XBRL US API - Full text search of reports\n", "This sample Python code completes a full-text search of SEC reports in the XBRL US Public Filings Database and returns results from 2018 to the present.\n", "\n", "**The document endpoint is a benefit of XBRL US Membership - join XBRL US for comprehensive access - https://xbrl.us/join.**\n", "### Authenticate for access token\n", "XBRL US limits records returned for a query to improve efficiency; this script loops to collect all data from the Public Filings Database for a query.\n", "Run the cell below, then type your XBRL US Web account email, account password, Client ID, and secret (get these from https://xbrl.us/access-token), pressing the Enter key on the keyboard after each entry." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "colab": {}, "id": "", "outputId": "", "tags": [ "hide-cell" ] }, "outputs": [], "source": [ "# @title\n", "import os, re, sys, json\n", "import requests\n", "import pandas as pd\n", "from IPython.display import display, HTML\n", "import numpy as np\n", "import getpass\n", "from datetime import datetime\n", "import urllib\n", "from urllib.parse import urlencode\n", "\n", "\n", "class tokenInfoClass:\n", " access_token = None\n", " refresh_token = None\n", " email = None\n", " username = None\n", " client_id = None\n", " client_secret = None\n", " url = 'https://api.xbrl.us/oauth2/token'\n", " headers = {\"Content-Type\": \"application/x-www-form-urlencoded\"}\n", "\n", "def refresh(info):\n", " refresh_auth = {\n", " 'client_id': info.client_id,\n", " 'client_secret' : info.client_secret,\n", " 'grant_type' : 'refresh_token',\n", " 'platform' : 'ipynb',\n", " 'refresh_token' : info.refresh_token\n", " }\n", " refreshres = requests.post(info.url, data=refresh_auth, headers=info.headers)\n", " refresh_json = refreshres.json()\n", " info.access_token = refresh_json.get('access_token')\n", " info.refresh_token = refresh_json.get('refresh_token')\n", " print('Your access token(%s) is refreshed for 60 minutes. If it expires again, run this cell to generate a new token and continue to use the query cells below.' % (info.access_token))\n", " return info\n", "\n", "tokenInfo = tokenInfoClass()\n", "\n", "# Helper to prompt only if value is missing\n", "def prompt_if_missing(value, prompt_text, secret=False):\n", " if value:\n", " return value\n", " if secret:\n", " return getpass.getpass(prompt=prompt_text)\n", " return input(prompt_text)\n", "\n", "# Load credentials (if .json exists)\n", "creds = {}\n", "if os.path.exists('creds.json'):\n", " try:\n", " with open('creds.json', 'r') as f:\n", " creds = json.load(f)\n", " print(\"Loaded .json\")\n", " except Exception as e:\n", " print(\"Warning: failed to read from .json:\", e)\n", "\n", "if creds:\n", " # Try nested prod object first\n", " selected = None\n", " if isinstance(creds.get('prod'), dict):\n", " selected = creds['prod']\n", " \n", " # Next, try prod-prefixed keys\n", " if not selected:\n", " selected = {}\n", " keys = ['email', 'password', 'client_id', 'client_secret']\n", " for k in keys:\n", " prefixed_key = 'prod' + k\n", " if creds.get(prefixed_key):\n", " selected[k] = creds.get(prefixed_key)\n", " # fall back to top-level key if prod variant not found\n", " elif creds.get(k):\n", " selected[k] = creds.get(k)\n", " \n", " # Verify we have all required keys\n", " if not all(selected.get(k) for k in ('email', 'password', 'client_id', 'client_secret')):\n", " # Fill in missing values from prompts\n", " selected = {\n", " 'email': selected.get('email'),\n", " 'password': selected.get('password'),\n", " 'client_id': selected.get('client_id'),\n", " 'client_secret': selected.get('client_secret')\n", " }\n", " \n", " # Assign values, prompting for any missing ones\n", " tokenInfo.email = prompt_if_missing(selected.get('email'), 'Enter your XBRL US Web account email: ')\n", " tokenInfo.password = prompt_if_missing(selected.get('password'), 'Password: ', secret=True)\n", " tokenInfo.client_id = prompt_if_missing(selected.get('client_id'), 'Client ID: ', secret=True)\n", " tokenInfo.client_secret = prompt_if_missing(selected.get('client_secret'), 'Secret: ', secret=True)\n", "\n", " print('Using credentials from .json as available.')\n", "else:\n", " # No creds.json — prompt the user\n", " tokenInfo.email = input('Enter your XBRL US Web account email: ')\n", " tokenInfo.password = getpass.getpass(prompt='Password: ')\n", " tokenInfo.client_id = getpass.getpass(prompt='Client ID: ')\n", " tokenInfo.client_secret = getpass.getpass(prompt='Secret: ')\n", "\n", "body_auth = {'username' : tokenInfo.email,\n", " 'client_id': tokenInfo.client_id,\n", " 'client_secret' : tokenInfo.client_secret,\n", " 'password' : tokenInfo.password,\n", " 'grant_type' : 'password',\n", " 'platform' : 'ipynb' }\n", "\n", "#print(body_auth)\n", "\n", "payload = urlencode(body_auth)\n", "res = requests.request(\"POST\", tokenInfo.url, data=payload, headers=tokenInfo.headers)\n", "auth_json = res.json()\n", "\n", "if 'error' in auth_json:\n", " print(\"\\n\\nThere was a problem generating the access token: %s Run the first cell again and enter the credentials.\" % (auth_json.get('error_description', auth_json)))\n", "else:\n", " tokenInfo.access_token = auth_json.get('access_token')\n", " tokenInfo.refresh_token = auth_json.get('refresh_token')\n", " if tokenInfo.access_token and tokenInfo.refresh_token:\n", " print (\"\\n\\nYour access token expires in 60 minutes. After it expires, it should be regenerated automatically. If not, run the cell rerun the first query cell. \\n\\nFor now, skip ahead to the section 'Make a Query'.\")\n", " else:\n", " print(\"\\n\\nAuthentication completed but tokens were not returned. Response: {}\".format(auth_json))\n", "\n", "#print(vars(tokenInfo))\n", "if tokenInfo.access_token and tokenInfo.refresh_token:\n", " print('\\n\\naccess token: ' + tokenInfo.access_token + ' refresh token: ' + tokenInfo.refresh_token)\n", "else:\n", " print('\\n\\nNo access token was generated. Check the messages above for errors.')" ] }, { "cell_type": "markdown", "metadata": { "id": "" }, "source": [ "### Make a query\n", "After the access token confirmation appears above, you can modify the query below, then use the **_Cell >> Run_** menu option from the cell **immediately below this text** to run the entire query for results.\n", "\n", "The sample results include data from 2018 to the present for all SEC reports, and may take several minutes to recreate. **The document endpoint is a benefit of XBRL US Membership** - see https://xbrl.us/membership to get started.\n", " \n", "Refer to XBRL API documentation at https://xbrlus.github.io/xbrl-api/#/document/getDocumentInfo for details on how to use the `document.text-search` and other parameters of the endpoint, as well as other endpoints for the XBRL API." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "" }, "id": "", "outputId": "" }, "outputs": [], "source": [ "# Define the parameters of the query. This query returns all\n", "# SEC reports where 'Microsoft' and 'internet of things'\n", "# appear within 300 words of each other.\n", "\n", "endpoint = 'document'\n", "\n", "text_search = [\"Microsoft NEAR/300 \\\"internet of things\\\"\"\n", " ]\n", "\n", "report_source = [\"SEC\"\n", " ]\n", "\n", "# Define data fields to return (multi-sort based on order)\n", "\n", "fields = [ # this is the list of the characteristics of the data being returned by the query\n", " 'entity.name.sort(ASC)',\n", " 'dts.id.sort(DESC)',\n", " 'document.type',\n", " 'document.uri',\n", " 'document.example'\n", " ]\n", "\n", "# Set unique rows as True of False (True drops any duplicate rows)\n", "unique = True\n", "\n", "# Limit the number of rows displayed by the notebook (does not impact the data frame)\n", "rows_to_display = 10 # Set as '' to display all rows in the notebook\n", "\n", "# Below is the list of what's being queried using the search endpoint.\n", "\n", "params = {\n", " 'document.text-search': ','.join(text_search),\n", " 'report.source-name': ','.join(report_source),\n", " 'fields': ','.join(fields)\n", " }\n", "\n", "print('\\n\\nNext click the run button (Colab) or in the gray code cell below, then click the Run button above to execute the query for results.\\n\\n')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "colab": { "base_uri": "" }, "id": "", "outputId": "", "tags": [ "hide-cell" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "On Mon Nov 18 16:20:57 2024 info@xbrl.us (client ID: 69e1257c ...) started the query and\n", "up to 5000 records are found so far ...\n", " - this set contained fewer than the 5000 possible, only 258 records.\n", "\n", "At Mon Nov 18 16:21:10 2024, the query finished with 258 rows returned in 0:00:12.859922 for \n", "https://api.xbrl.us/api/v1/document/search?unique&document.text-search=Microsoft+NEAR/300+\"internet+of+things\"&report.source-name=SEC&fields=entity.name.sort(ASC),dts.id.sort(DESC),document.type,document.uri,document.example\n" ] }, { "data": { "text/html": [ "
| \n", " | entity.name | \n", "dts.id | \n", "document.type | \n", "document.uri | \n", "document.example | \n", "
|---|---|---|---|---|---|
| 0 | \n", "ABB LTD | \n", "183503 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1091587/000110465917015892/a16-22210_620f.htm | \n", "... electric motors account for <b>nearly</b> two‑thirds of the ... connects motors with the <b>Internet</b> of <b>Things</b> (IoT). The Drives and ... -reaching strategic partnership with <b>Microsoft</b> to develop next‑generation ... to decrease by approximately $<b>300</b> million due to additional ... | \n", "
| 1 | \n", "ACCENTURE HOLDINGS PLC | \n", "267928 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1647339/000164733917000019/acnholdings831201710k.htm | \n", "... Amazon Web Services, Apple, Google, <b>Microsoft,</b> Oracle, Pegasystems, salesforce.com, SAP, ... intelligence, augmented reality, automation, blockchain, <b>Internet</b> of <b>Things,</b> quantum computing and as ... meet our needs in the <b>near</b> future. ITEM 3. LEGAL ... | \n", "
| 2 | \n", "Accenture plc | \n", "347226 | \n", "inline | \n", "http://www.sec.gov/Archives/edgar/data/1467373/000146737319000339/acn831201910k.htm | \n", "... Web Services, Google, <b>Microsoft,</b> Oracle, Pegasystems, Salesforce, ... automation, blockchain, <b>Internet</b> of <b>Things,</b> quantum computing ... needs in the <b>near</b> future. ITEM 3 ... noncontrolling interests ( 159 ) <b>300</b> ( 2,075 ) Cash flow ... | \n", "
| 3 | \n", "Accenture plc | \n", "304571 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1467373/000146737318000318/acn831201810k.htm | \n", "... Services, Apple, Google, <b>Microsoft,</b> Oracle, Pegasystems, Salesforce, ... automation, blockchain, <b>Internet</b> of <b>Things,</b> quantum computing ... needs in the <b>near</b> future. 22 Table ... attributable to noncontrolling interests <b>300</b> (2,075 ) (4,740 ) ? ... | \n", "
| 4 | \n", "Accenture plc | \n", "267957 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1467373/000146737317000430/acn831201710k.htm | \n", "... Amazon Web Services, Apple, Google, <b>Microsoft,</b> Oracle, Pegasystems, salesforce.com, SAP, ... intelligence, augmented reality, automation, blockchain, <b>Internet</b> of <b>Things,</b> quantum computing and as ... meet our needs in the <b>near</b> future. ITEM 3. LEGAL ... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 253 | \n", "UBI Blockchain Internet LTD-DE | \n", "265427 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1500242/000149315217010221/forms-1a.htm | \n", "... the blockchain technology and <b>internet</b> of <b>things</b> promote industrial information and ... research and development, including IBM, <b>Microsoft,</b> Intel, Blockstream and Thompson ... and advancement of blockchian, <b>internet</b> of <b>things,</b> and technological innovation platform ... | \n", "
| 254 | \n", "UBI Blockchain Internet LTD-DE | \n", "257745 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1500242/000149315217007856/form10-q.htm | \n", "... of blockchain technology and <b>internet</b> of <b>things</b>. Blockchain technology-based applications ... and development, including IBM, <b>Microsoft,</b> Intel, Blockstream and Thompson ... and advancement of blockchain, <b>internet</b> of <b>things,</b> and technological innovation platform ... | \n", "
| 255 | \n", "UBI Blockchain Internet LTD-DE | \n", "190593 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1500242/000149315217007534/forms-1a.htm | \n", "... the blockchain technology and <b>internet</b> of <b>things</b> promote industrial information and ... research and development, including IBM, <b>Microsoft,</b> Intel, Blockstream and Thompson ... and advancement of blockchian, <b>internet</b> of <b>things,</b> and technological innovation platform ... | \n", "
| 256 | \n", "UBI Blockchain Internet LTD-DE | \n", "190488 | \n", "report | \n", "http://www.sec.gov/Archives/edgar/data/1500242/000149315217007324/form10qa.htm | \n", "... of blockchain technology and <b>internet</b> of <b>things</b>. Blockchain technology-based applications ... and development, including IBM, <b>Microsoft,</b> Intel, Blockstream and Thompson ... and advancement of blockchain, <b>internet</b> of <b>things,</b> and technological innovation platform ... | \n", "
| 257 | \n", "UNIVERSAL ELECTRONICS INC. | \n", "445815 | \n", "inline | \n", "http://www.sec.gov/Archives/edgar/data/101984/000010198421000009/ueic-20201231.htm | \n", "... console manufacturers such as <b>Microsoft</b> Corporation on its ... Service, a managed <b>Internet</b> of <b>Things</b> (\"IoT\") service which ... knowledge library that includes <b>nearly</b> 13,000 brands comprising ... 100,000 100,000 75,000 $ <b>300</b> million 75,000 75,000 75,000 ... | \n", "