{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "wrds_compustat1.ipynb",
"provenance": [],
"collapsed_sections": [],
"mount_file_id": "1WSHWgICu6eul3zMit8E6pjfC68ce_a5b",
"authorship_tag": "ABX9TyMmRq9I+S1LbuDJu3QN2IXs",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
" "
]
},
{
"cell_type": "markdown",
"source": [
"This code is benefited from the publicly available code by QINGYI (FREDA) SONG DRECHSLER.
\n",
"\n",
"Reference: Drechsler, Qingyi (Freda) S., 2022, Python Programs for Empirical Finance, https://www.fredasongdrechsler.com\n"
],
"metadata": {
"id": "--sTFhudmsBs"
}
},
{
"cell_type": "code",
"metadata": {
"collapsed": true,
"id": "zgpZF6Vo11y5"
},
"source": [
"import pandas as pd \n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"plt.style.use('fivethirtyeight')\n",
"\n",
"%matplotlib inline\n",
"%config InlineBackend.figure_format = 'retina'"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "y2C0OgijlDzv",
"outputId": "2d25a1da-619a-4d37-edb7-f97663da0b9d"
},
"source": [
"!pip install wrds"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting wrds\n",
" Downloading wrds-3.1.1-py3-none-any.whl (12 kB)\n",
"Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.7/dist-packages (from wrds) (1.4.37)\n",
"Collecting psycopg2-binary\n",
" Downloading psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
"\u001b[K |████████████████████████████████| 3.0 MB 11.8 MB/s \n",
"\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from wrds) (1.3.5)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from wrds) (1.21.6)\n",
"Collecting mock\n",
" Downloading mock-4.0.3-py3-none-any.whl (28 kB)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->wrds) (2022.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->wrds) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->wrds) (1.15.0)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from sqlalchemy->wrds) (4.11.4)\n",
"Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.7/dist-packages (from sqlalchemy->wrds) (1.1.2)\n",
"Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->sqlalchemy->wrds) (4.1.1)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->sqlalchemy->wrds) (3.8.0)\n",
"Installing collected packages: psycopg2-binary, mock, wrds\n",
"Successfully installed mock-4.0.3 psycopg2-binary-2.9.3 wrds-3.1.1\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "TlUrRmTnlFD_"
},
"source": [
"import wrds\n",
"\n",
"# Now I am connecting to WRDS site - you must have a WRDS account user id and password\n",
"conn=wrds.Connection()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"help(wrds)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eneGwr9g3tDo",
"outputId": "eeb95eb5-604d-44d1-fb49-5c0ae7371ef7"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Help on package wrds:\n",
"\n",
"NAME\n",
" wrds\n",
"\n",
"DESCRIPTION\n",
" WRDS Python Data Access Library\n",
" ==============================\n",
" \n",
" WRDS-Py is a library for extracting data from WRDS data sources and getting it into Pandas.\n",
" \n",
" >>> import wrds\n",
" >>> db = wrds.Connection()\n",
" >>> db.list_libraries()\n",
" ['aha', 'aha_sample', 'ahasamp', 'audit', 'audit_audit_comp', ...]\n",
" >>> db.list_tables(library='crsp')\n",
" ['acti', 'asia', 'asib', 'asic', 'asio', 'asix', 'bmdebt', 'bmheader', ...]\n",
" >>> data = db.raw_sql('SELECT * FROM crsp.stocknames', index_col='permno')\n",
" >>> data.head()\n",
" permco namedt nameenddt cusip ncusip ticker permno\n",
" 10000.0 7952.0 1986-01-07 1987-06-11 68391610 68391610 OMFGA\n",
" 10001.0 7953.0 1986-01-09 1993-11-21 36720410 39040610 GFGC\n",
" 10001.0 7953.0 1993-11-22 2008-02-04 36720410 29274A10 EWST\n",
" 10001.0 7953.0 2008-02-05 2009-08-03 36720410 29274A20 EWST\n",
" 10001.0 7953.0 2009-08-04 2009-12-17 36720410 29269V10 EGAS\n",
" ...\n",
"\n",
"PACKAGE CONTENTS\n",
" sql\n",
" test\n",
"\n",
"DATA\n",
" __copyright__ = '2017-2021 Wharton Research Data Services'\n",
" __title__ = 'wrds-py'\n",
"\n",
"VERSION\n",
" 3.1.1\n",
"\n",
"AUTHOR\n",
" Wharton Research Data Services\n",
"\n",
"FILE\n",
" /usr/local/lib/python3.7/dist-packages/wrds/__init__.py\n",
"\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#listing all dtabases or libraries in WRDS\n",
"conn.list_libraries()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pf-sgx9YwW3W",
"outputId": "3714db2e-b6fd-4e40-b302-26901b7e7103"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['aha',\n",
" 'aha_sample',\n",
" 'ahasamp',\n",
" 'audit',\n",
" 'audit_audit_comp',\n",
" 'audit_common',\n",
" 'audit_corp_legal',\n",
" 'auditsmp',\n",
" 'auditsmp_all',\n",
" 'bank',\n",
" 'blab',\n",
" 'block',\n",
" 'block_all',\n",
" 'boardex',\n",
" 'boardex_trial',\n",
" 'boardsmp',\n",
" 'bvd',\n",
" 'bvdsamp',\n",
" 'calcbench_trial',\n",
" 'calcbnch',\n",
" 'cboe',\n",
" 'cboe_all',\n",
" 'centris',\n",
" 'ciq',\n",
" 'ciqsamp',\n",
" 'ciqsamp_capstrct',\n",
" 'ciqsamp_common',\n",
" 'ciqsamp_keydev',\n",
" 'ciqsamp_pplintel',\n",
" 'ciqsamp_transcripts',\n",
" 'cisdm',\n",
" 'cisdmsmp',\n",
" 'clrvt',\n",
" 'clrvtsmp',\n",
" 'columnar',\n",
" 'comp',\n",
" 'comp_bank',\n",
" 'comp_bank_daily',\n",
" 'comp_execucomp',\n",
" 'comp_na_annual_all',\n",
" 'comp_na_daily_all',\n",
" 'comp_na_monthly_all',\n",
" 'comp_segments_hist',\n",
" 'comp_segments_hist_daily',\n",
" 'compa',\n",
" 'compb',\n",
" 'compbd',\n",
" 'compdcur',\n",
" 'compg',\n",
" 'compgd',\n",
" 'comph',\n",
" 'compm',\n",
" 'compmcur',\n",
" 'compnad',\n",
" 'compsamp',\n",
" 'compsamp_snapshot',\n",
" 'compseg',\n",
" 'compsegd',\n",
" 'compsnap',\n",
" 'comscore',\n",
" 'contrib',\n",
" 'contrib_ceo_turnover',\n",
" 'contrib_char_returns',\n",
" 'contrib_general',\n",
" 'contrib_intangible_value',\n",
" 'contrib_kpss',\n",
" 'crsp',\n",
" 'crsp_a_ccm',\n",
" 'crsp_a_indexes',\n",
" 'crsp_a_stock',\n",
" 'crsp_q_indexhist',\n",
" 'crspa',\n",
" 'crspm',\n",
" 'crspq',\n",
" 'crspsamp',\n",
" 'csmar',\n",
" 'cusipm',\n",
" 'custom_jl',\n",
" 'dealscan',\n",
" 'djones',\n",
" 'djones_all',\n",
" 'dmef',\n",
" 'dmef_all',\n",
" 'doe',\n",
" 'doe_all',\n",
" 'emdb',\n",
" 'estimize',\n",
" 'etfg',\n",
" 'etfg_samp',\n",
" 'etfgsamp',\n",
" 'eureka',\n",
" 'eurekahedge_sample',\n",
" 'eursamp',\n",
" 'eventus',\n",
" 'evts',\n",
" 'execcomp',\n",
" 'factsamp_all',\n",
" 'factsamp_revere',\n",
" 'factset',\n",
" 'ff',\n",
" 'ff_all',\n",
" 'fisd',\n",
" 'fisdsamp',\n",
" 'fjc',\n",
" 'fjc_linking',\n",
" 'fjc_litigation',\n",
" 'frb',\n",
" 'frb_all',\n",
" 'fssamp',\n",
" 'ftse',\n",
" 'ftsesamp',\n",
" 'ginsight',\n",
" 'gmi',\n",
" 'govpx',\n",
" 'govpxsmp',\n",
" 'hbase',\n",
" 'hbase_sample',\n",
" 'hbsamp',\n",
" 'hfr',\n",
" 'hfrsamp',\n",
" 'hfrsamp_hfrdb',\n",
" 'ibes',\n",
" 'ibescorp',\n",
" 'ibeskpi',\n",
" 'ifgr',\n",
" 'ifgrsamp',\n",
" 'ims',\n",
" 'ims_obp_trial',\n",
" 'imssamp',\n",
" 'infogroupsamp_business',\n",
" 'infogroupsamp_residential',\n",
" 'iri',\n",
" 'iss',\n",
" 'issm',\n",
" 'kld',\n",
" 'ktmine',\n",
" 'ktmine_patents_samp',\n",
" 'ktsamp',\n",
" 'levin',\n",
" 'lspd',\n",
" 'lspdsamp',\n",
" 'lvnsamp',\n",
" 'lvnsamp_all',\n",
" 'macrofin',\n",
" 'macrofin_comm_trade',\n",
" 'markit',\n",
" 'mfl',\n",
" 'midas',\n",
" 'mrktsamp',\n",
" 'mrktsamp_cds',\n",
" 'mrktsamp_cdx',\n",
" 'mrktsamp_msf',\n",
" 'msfanly',\n",
" 'msfinst',\n",
" 'msrb',\n",
" 'msrb_all',\n",
" 'msrbsamp',\n",
" 'nastraq',\n",
" 'omtrial',\n",
" 'optionm',\n",
" 'optionmsamp_europe',\n",
" 'optionmsamp_us',\n",
" 'otc',\n",
" 'otc_endofday',\n",
" 'pacap',\n",
" 'phlx',\n",
" 'phlx_all',\n",
" 'pitchbk',\n",
" 'pitchsmp',\n",
" 'ppublica',\n",
" 'ppubsamp',\n",
" 'ppubsamp_d4d',\n",
" 'preqin',\n",
" 'preqsamp',\n",
" 'preqsamp_all',\n",
" 'public',\n",
" 'public_all',\n",
" 'pwt',\n",
" 'pwt_all',\n",
" 'ravenpack_trial',\n",
" 'rent',\n",
" 'reprisk',\n",
" 'reprisk_sample',\n",
" 'repsamp',\n",
" 'revere',\n",
" 'risk',\n",
" 'risksamp',\n",
" 'risksamp_all',\n",
" 'rpa',\n",
" 'rpna',\n",
" 'rpnasamp',\n",
" 'rq_all',\n",
" 'sdc',\n",
" 'sdcsamp',\n",
" 'secsamp',\n",
" 'secsamp_all',\n",
" 'snapsamp',\n",
" 'snl',\n",
" 'snlsamp',\n",
" 'snlsamp_fig',\n",
" 'sprat',\n",
" 'sustain',\n",
" 'sustainalyticssamp_all',\n",
" 'sustsamp',\n",
" 'taq',\n",
" 'taqmsamp',\n",
" 'taqmsamp_all',\n",
" 'taqmsec',\n",
" 'taqsamp',\n",
" 'tass',\n",
" 'tfn',\n",
" 'totalq',\n",
" 'totalq_all',\n",
" 'toyo',\n",
" 'toyosamp',\n",
" 'toyosamp_all',\n",
" 'tr_dealscan',\n",
" 'tr_ds',\n",
" 'trace',\n",
" 'trace_enhanced',\n",
" 'trace_standard',\n",
" 'trcstsmp',\n",
" 'trdssamp',\n",
" 'trdstrm',\n",
" 'tresg',\n",
" 'tresgsmp',\n",
" 'trown',\n",
" 'trsamp',\n",
" 'trsamp_ds_eq',\n",
" 'trsamp_dscom',\n",
" 'trsamp_dsecon',\n",
" 'trsamp_dsfut',\n",
" 'trsdcgs',\n",
" 'trucost',\n",
" 'trws',\n",
" 'twoiq',\n",
" 'twoiq_samp',\n",
" 'twoiqsmp',\n",
" 'wappsamp',\n",
" 'wcai',\n",
" 'wind',\n",
" 'windsmp',\n",
" 'wqa',\n",
" 'wrds_lib_internal',\n",
" 'wrdsapps',\n",
" 'wrdsapps_backtest_basic',\n",
" 'wrdsapps_backtest_plus',\n",
" 'wrdsapps_eushort',\n",
" 'wrdsapps_evtstudy_int_ginsight',\n",
" 'wrdsapps_evtstudy_lr',\n",
" 'wrdsapps_finratio',\n",
" 'wrdsapps_finratio_ccm',\n",
" 'wrdsapps_link_comp_eushort',\n",
" 'wrdsapps_link_crsp_bond',\n",
" 'wrdsapps_link_crsp_factset',\n",
" 'wrdsapps_link_crsp_taq',\n",
" 'wrdsapps_link_datastream_wscope',\n",
" 'wrdsapps_link_supplychain',\n",
" 'wrdsapps_patents',\n",
" 'wrdsapps_subsidiary',\n",
" 'wrdsappssamp_all',\n",
" 'wrdsrpts',\n",
" 'wrdsrpts_rep_usage',\n",
" 'wrdssec',\n",
" 'wrdssec_midas',\n",
" 'zacks',\n",
" 'zacksamp',\n",
" 'zacksamp_all']"
]
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"source": [
"help(conn.list_libraries)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "W4hTpFGCo7DT",
"outputId": "4d4f2cb1-6d70-434b-c2c8-9e12d75d96d8"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Help on method list_libraries in module wrds.sql:\n",
"\n",
"list_libraries() method of wrds.sql.Connection instance\n",
" Return all the libraries (schemas) the user can access.\n",
" \n",
" :rtype: list\n",
" \n",
" Usage::\n",
" >>> db.list_libraries()\n",
" ['aha', 'audit', 'block', 'boardex', ...]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"help(conn.list_tables)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fvyCEW4voyMr",
"outputId": "48da471d-8539-4213-e28b-9b47588b6d73"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Help on method list_tables in module wrds.sql:\n",
"\n",
"list_tables(library) method of wrds.sql.Connection instance\n",
" Returns a list of all the views/tables/foreign tables within a schema.\n",
" \n",
" :param library: Postgres schema name.\n",
" \n",
" :rtype: list\n",
" \n",
" Usage::\n",
" >>> db.list_tables('wrdssec')\n",
" ['wciklink_gvkey', 'dforms', 'wciklink_cusip', 'wrds_forms', ...]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"conn.list_tables(library='comp')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dM7RPF23ZUJg",
"outputId": "38dab0f2-98e7-48cc-919f-bf218bd5b035"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['aco_amda',\n",
" 'aco_imda',\n",
" 'aco_indfnta',\n",
" 'aco_indfntq',\n",
" 'aco_indfntytd',\n",
" 'aco_indsta',\n",
" 'aco_indstq',\n",
" 'aco_indstytd',\n",
" 'aco_notesa',\n",
" 'aco_notesq',\n",
" 'aco_notessa',\n",
" 'aco_notesytd',\n",
" 'aco_pnfnda',\n",
" 'aco_pnfndq',\n",
" 'aco_pnfndytd',\n",
" 'aco_pnfnta',\n",
" 'aco_pnfntq',\n",
" 'aco_pnfntytd',\n",
" 'aco_transa',\n",
" 'aco_transq',\n",
" 'aco_transsa',\n",
" 'aco_transytd',\n",
" 'adsprate',\n",
" 'asec_amda',\n",
" 'asec_imda',\n",
" 'asec_notesa',\n",
" 'asec_notesq',\n",
" 'asec_transa',\n",
" 'asec_transq',\n",
" 'bank_aacctchg',\n",
" 'bank_adesind',\n",
" 'bank_afnd1',\n",
" 'bank_afnd2',\n",
" 'bank_afnddc1',\n",
" 'bank_afnddc2',\n",
" 'bank_afntind',\n",
" 'bank_funda',\n",
" 'bank_funda_fncd',\n",
" 'bank_fundq',\n",
" 'bank_fundq_fncd',\n",
" 'bank_iacctchg',\n",
" 'bank_idesind',\n",
" 'bank_ifndq',\n",
" 'bank_ifndytd',\n",
" 'bank_ifntq',\n",
" 'bank_ifntytd',\n",
" 'bank_names',\n",
" 'bank_namesq',\n",
" 'chars',\n",
" 'co_aacctchg',\n",
" 'co_aaudit',\n",
" 'co_adesind',\n",
" 'co_adjfact',\n",
" 'co_afnd1',\n",
" 'co_afnd2',\n",
" 'co_afnddc1',\n",
" 'co_afnddc2',\n",
" 'co_afntind1',\n",
" 'co_afntind2',\n",
" 'co_ainvval',\n",
" 'co_amkt',\n",
" 'co_busdescl',\n",
" 'co_cotype',\n",
" 'co_filedate',\n",
" 'co_fortune',\n",
" 'co_hgic',\n",
" 'co_iacctchg',\n",
" 'co_iaudit',\n",
" 'co_idesind',\n",
" 'co_ifndq',\n",
" 'co_ifndsa',\n",
" 'co_ifndytd',\n",
" 'co_ifntq',\n",
" 'co_ifntsa',\n",
" 'co_ifntytd',\n",
" 'co_imkt',\n",
" 'co_industry',\n",
" 'co_ipcd',\n",
" 'co_mthly',\n",
" 'co_offtitl',\n",
" 'company',\n",
" 'currency',\n",
" 'dd_group',\n",
" 'dd_group_xref',\n",
" 'dd_item',\n",
" 'dd_package',\n",
" 'ecind_desc',\n",
" 'ecind_mth',\n",
" 'exrt_dly',\n",
" 'exrt_mth',\n",
" 'filings',\n",
" 'funda',\n",
" 'funda_fncd',\n",
" 'fundq',\n",
" 'fundq_fncd',\n",
" 'g_chars',\n",
" 'g_co_aaudit',\n",
" 'g_co_adesind',\n",
" 'g_co_afnd1',\n",
" 'g_co_afnd2',\n",
" 'g_co_afnddc1',\n",
" 'g_co_afnddc2',\n",
" 'g_co_afntind1',\n",
" 'g_co_afntind2',\n",
" 'g_co_ainvval',\n",
" 'g_co_gsuppl',\n",
" 'g_co_hgic',\n",
" 'g_co_iaudit',\n",
" 'g_co_idesind',\n",
" 'g_co_ifndq',\n",
" 'g_co_ifndsa',\n",
" 'g_co_ifndytd',\n",
" 'g_co_ifntq',\n",
" 'g_co_ifntsa',\n",
" 'g_co_ifntytd',\n",
" 'g_co_industry',\n",
" 'g_co_ipcd',\n",
" 'g_co_offtitl',\n",
" 'g_company',\n",
" 'g_currency',\n",
" 'g_ecind_desc',\n",
" 'g_ecind_mth',\n",
" 'g_exrt_dly',\n",
" 'g_exrt_mth',\n",
" 'g_funda',\n",
" 'g_funda_fncd',\n",
" 'g_fundq',\n",
" 'g_fundq_fncd',\n",
" 'g_idx_daily',\n",
" 'g_idx_index',\n",
" 'g_idx_mth',\n",
" 'g_idxcst_his',\n",
" 'g_names',\n",
" 'g_names_ix',\n",
" 'g_names_ix_cst',\n",
" 'g_namesq',\n",
" 'g_sec_adesind',\n",
" 'g_sec_adjfact',\n",
" 'g_sec_afnd',\n",
" 'g_sec_afnddc',\n",
" 'g_sec_afnt',\n",
" 'g_sec_divid',\n",
" 'g_sec_dprc',\n",
" 'g_sec_dtrt',\n",
" 'g_sec_history',\n",
" 'g_sec_idesind',\n",
" 'g_sec_ifnd',\n",
" 'g_sec_ifnt',\n",
" 'g_sec_split',\n",
" 'g_secd',\n",
" 'g_secnamesd',\n",
" 'g_security',\n",
" 'g_sedolgvkey',\n",
" 'g_tmptable_pkg6153_tbl4023',\n",
" 'idx_ann',\n",
" 'idx_anndes',\n",
" 'idx_daily',\n",
" 'idx_index',\n",
" 'idx_mth',\n",
" 'idx_qrt',\n",
" 'idx_qrtdes',\n",
" 'idxcst_his',\n",
" 'io_qaggregate',\n",
" 'io_qbuysell',\n",
" 'io_qchanges',\n",
" 'io_qfloatadj',\n",
" 'io_qholders',\n",
" 'it_mbuysell',\n",
" 'it_msummary',\n",
" 'it_r_rltn',\n",
" 'names',\n",
" 'names_aco_indsta',\n",
" 'names_aco_indstq',\n",
" 'names_aco_pnfnda',\n",
" 'names_aco_pnfndq',\n",
" 'names_adsprate',\n",
" 'names_ix',\n",
" 'names_ix_cst',\n",
" 'names_seg',\n",
" 'namesd',\n",
" 'namesm',\n",
" 'namesq',\n",
" 'r_accstd',\n",
" 'r_acqmeth',\n",
" 'r_auditors',\n",
" 'r_auopic',\n",
" 'r_balpres',\n",
" 'r_cf_formt',\n",
" 'r_co_status',\n",
" 'r_coindpre',\n",
" 'r_compstat',\n",
" 'r_consol',\n",
" 'r_country',\n",
" 'r_cstclscd',\n",
" 'r_datacode',\n",
" 'r_datafmt',\n",
" 'r_divtaxmarker',\n",
" 'r_docsrce',\n",
" 'r_ex_codes',\n",
" 'r_exchgtier',\n",
" 'r_exrt_typ',\n",
" 'r_fndfntcd',\n",
" 'r_footnts',\n",
" 'r_foricd',\n",
" 'r_giccd',\n",
" 'r_hcalendr',\n",
" 'r_idxclscd',\n",
" 'r_inactvcd',\n",
" 'r_incstats',\n",
" 'r_indfmt',\n",
" 'r_indsec',\n",
" 'r_invval',\n",
" 'r_issuetyp',\n",
" 'r_majidxcl',\n",
" 'r_mic_codes',\n",
" 'r_naiccd',\n",
" 'r_notetype',\n",
" 'r_ntsubtype',\n",
" 'r_offcrso',\n",
" 'r_ogmethod',\n",
" 'r_opinions',\n",
" 'r_prc_stat',\n",
" 'r_qsrcdoc',\n",
" 'r_sec_stat',\n",
" 'r_secannfn',\n",
" 'r_sectors',\n",
" 'r_siccd',\n",
" 'r_spiicd',\n",
" 'r_spmicd',\n",
" 'r_statalrt',\n",
" 'r_states',\n",
" 'r_stko',\n",
" 'r_titles',\n",
" 'r_updates',\n",
" 'sec_adesind',\n",
" 'sec_adjfact',\n",
" 'sec_afnd',\n",
" 'sec_afnddc',\n",
" 'sec_afnt',\n",
" 'sec_divid',\n",
" 'sec_dprc',\n",
" 'sec_dtrt',\n",
" 'sec_history',\n",
" 'sec_idesind',\n",
" 'sec_ifnd',\n",
" 'sec_ifnt',\n",
" 'sec_mdivfn',\n",
" 'sec_mshare',\n",
" 'sec_msptfn',\n",
" 'sec_mth',\n",
" 'sec_mthdiv',\n",
" 'sec_mthprc',\n",
" 'sec_mthspt',\n",
" 'sec_mthtrt',\n",
" 'sec_shortint',\n",
" 'sec_spind',\n",
" 'sec_split',\n",
" 'secd',\n",
" 'secm',\n",
" 'security',\n",
" 'sedolgvkey',\n",
" 'seg_ann',\n",
" 'seg_annfund',\n",
" 'seg_customer',\n",
" 'seg_geo',\n",
" 'seg_naics',\n",
" 'seg_product',\n",
" 'seg_type',\n",
" 'spidx_cst',\n",
" 'spind',\n",
" 'spind_dly',\n",
" 'spind_mth',\n",
" 'tmptable_pkg6153_tbl4023',\n",
" 'wrds_seg_customer',\n",
" 'wrds_seg_geo',\n",
" 'wrds_seg_product',\n",
" 'wrds_segmerged',\n",
" 'xfl_column',\n",
" 'xfl_table']"
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [
"help(conn.describe_table)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ACHSh5dYTEMr",
"outputId": "6c432088-aaea-4b99-8aee-7dc64072035e"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Help on method describe_table in module wrds.sql:\n",
"\n",
"describe_table(library, table) method of wrds.sql.Connection instance\n",
" Takes the library and the table and describes all the columns\n",
" in that table.\n",
" Includes Column Name, Column Type, Nullable?.\n",
" \n",
" :param library: Postgres schema name.\n",
" :param table: Postgres table name.\n",
" \n",
" :rtype: pandas.DataFrame\n",
" \n",
" Usage::\n",
" >>> db.describe_table('wrdssec_all', 'dforms')\n",
" name nullable type\n",
" 0 cik True VARCHAR\n",
" 1 fdate True DATE\n",
" 2 secdate True DATE\n",
" 3 form True VARCHAR\n",
" 4 coname True VARCHAR\n",
" 5 fname True VARCHAR\n",
"\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"**Note that there are 948 columns or Compustat variables and there are 858066 firm-years.** However, we will see that there are two lines for each fimr-year in some cases, so we need to do mroe investigation."
],
"metadata": {
"id": "WzR8cfvPVVQa"
}
},
{
"cell_type": "code",
"source": [
"compustat_variables=conn.describe_table('comp', 'funda')\n",
"print(45 * \"-\")\n",
"print(type(compustat_variables))\n",
"print(45 * \"-\")\n",
"print(compustat_variables)\n",
"print(45 * \"-\")\n",
"print(compustat_variables.shape)\n",
"print(45 * \"-\")\n",
"print(compustat_variables.info())\n",
"print(45 * \"-\") \n",
"compustat_variables.head(20)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "e56922bb-fb96-4cf2-c5f5-bc2a61138c60",
"id": "hevdcKmcTUqU"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Approximately 858066 rows in comp.funda.\n",
"---------------------------------------------\n",
"\n",
"---------------------------------------------\n",
" name nullable type\n",
"0 gvkey True VARCHAR(6)\n",
"1 datadate True DATE\n",
"2 fyear True DOUBLE_PRECISION\n",
"3 indfmt True VARCHAR(12)\n",
"4 consol True VARCHAR(2)\n",
".. ... ... ...\n",
"943 au True VARCHAR(8)\n",
"944 auop True VARCHAR(8)\n",
"945 auopic True VARCHAR(1)\n",
"946 ceoso True VARCHAR(1)\n",
"947 cfoso True VARCHAR(1)\n",
"\n",
"[948 rows x 3 columns]\n",
"---------------------------------------------\n",
"(948, 3)\n",
"---------------------------------------------\n",
"\n",
"RangeIndex: 948 entries, 0 to 947\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 name 948 non-null object\n",
" 1 nullable 948 non-null bool \n",
" 2 type 948 non-null object\n",
"dtypes: bool(1), object(2)\n",
"memory usage: 15.9+ KB\n",
"None\n",
"---------------------------------------------\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" name nullable type\n",
"0 gvkey True VARCHAR(6)\n",
"1 datadate True DATE\n",
"2 fyear True DOUBLE_PRECISION\n",
"3 indfmt True VARCHAR(12)\n",
"4 consol True VARCHAR(2)\n",
"5 popsrc True VARCHAR(1)\n",
"6 datafmt True VARCHAR(12)\n",
"7 tic True VARCHAR(8)\n",
"8 cusip True VARCHAR(9)\n",
"9 conm True VARCHAR(70)\n",
"10 acctchg True VARCHAR(8)\n",
"11 acctstd True VARCHAR(8)\n",
"12 acqmeth True VARCHAR(2)\n",
"13 adrr True DOUBLE_PRECISION\n",
"14 ajex True DOUBLE_PRECISION\n",
"15 ajp True DOUBLE_PRECISION\n",
"16 bspr True VARCHAR(8)\n",
"17 compst True VARCHAR(8)\n",
"18 curcd True VARCHAR(3)\n",
"19 curncd True VARCHAR(3)"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" name \n",
" nullable \n",
" type \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" gvkey \n",
" True \n",
" VARCHAR(6) \n",
" \n",
" \n",
" 1 \n",
" datadate \n",
" True \n",
" DATE \n",
" \n",
" \n",
" 2 \n",
" fyear \n",
" True \n",
" DOUBLE_PRECISION \n",
" \n",
" \n",
" 3 \n",
" indfmt \n",
" True \n",
" VARCHAR(12) \n",
" \n",
" \n",
" 4 \n",
" consol \n",
" True \n",
" VARCHAR(2) \n",
" \n",
" \n",
" 5 \n",
" popsrc \n",
" True \n",
" VARCHAR(1) \n",
" \n",
" \n",
" 6 \n",
" datafmt \n",
" True \n",
" VARCHAR(12) \n",
" \n",
" \n",
" 7 \n",
" tic \n",
" True \n",
" VARCHAR(8) \n",
" \n",
" \n",
" 8 \n",
" cusip \n",
" True \n",
" VARCHAR(9) \n",
" \n",
" \n",
" 9 \n",
" conm \n",
" True \n",
" VARCHAR(70) \n",
" \n",
" \n",
" 10 \n",
" acctchg \n",
" True \n",
" VARCHAR(8) \n",
" \n",
" \n",
" 11 \n",
" acctstd \n",
" True \n",
" VARCHAR(8) \n",
" \n",
" \n",
" 12 \n",
" acqmeth \n",
" True \n",
" VARCHAR(2) \n",
" \n",
" \n",
" 13 \n",
" adrr \n",
" True \n",
" DOUBLE_PRECISION \n",
" \n",
" \n",
" 14 \n",
" ajex \n",
" True \n",
" DOUBLE_PRECISION \n",
" \n",
" \n",
" 15 \n",
" ajp \n",
" True \n",
" DOUBLE_PRECISION \n",
" \n",
" \n",
" 16 \n",
" bspr \n",
" True \n",
" VARCHAR(8) \n",
" \n",
" \n",
" 17 \n",
" compst \n",
" True \n",
" VARCHAR(8) \n",
" \n",
" \n",
" 18 \n",
" curcd \n",
" True \n",
" VARCHAR(3) \n",
" \n",
" \n",
" 19 \n",
" curncd \n",
" True \n",
" VARCHAR(3) \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"help(conn.get_row_count)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ytFXWo9-sOPn",
"outputId": "76ed919e-f75d-40c2-a896-f0d6026a6042"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Help on method get_row_count in module wrds.sql:\n",
"\n",
"get_row_count(library, table) method of wrds.sql.Connection instance\n",
" Uses the library and table to get the approximate\n",
" row count for the table.\n",
" \n",
" :param library: Postgres schema name.\n",
" :param table: Postgres table name.\n",
" \n",
" :rtype: int\n",
" \n",
" Usage::\n",
" >>> db.get_row_count('wrdssec', 'dforms')\n",
" 16378400\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"conn.get_row_count('comp', 'funda')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "F8ifeLhasN4C",
"outputId": "ebe40d0e-9a65-42ca-ec09-736a92464018"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"858066"
]
},
"metadata": {},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"source": [
"help(conn.get_table)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "el_sD1TcsGgw",
"outputId": "774b3a55-a8a3-4da8-fa01-521f25648913"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Help on method get_table in module wrds.sql:\n",
"\n",
"get_table(library, table, obs=-1, offset=0, columns=None, coerce_float=None, index_col=None, date_cols=None) method of wrds.sql.Connection instance\n",
" Creates a data frame from an entire table in the database.\n",
" \n",
" :param sql: SQL code in string object.\n",
" :param library: Postgres schema name.\n",
" \n",
" :param obs: (optional) int, default: -1\n",
" Specifies the number of observations to pull from the table.\n",
" An integer less than 0 will return the entire table.\n",
" :param offset: (optional) int, default: 0\n",
" Specifies the starting point for the query.\n",
" An offset of 0 will start selecting from the beginning.\n",
" :param columns: (optional) list or tuple, default: None\n",
" Specifies the columns to be included in the output data frame.\n",
" :param coerce_float: (optional) boolean, default: True\n",
" Attempt to convert values to non-string, non-numeric objects\n",
" to floating point. Can result in loss of precision.\n",
" :param date_cols: (optional) list or dict, default: None\n",
" - List of column names to parse as date\n",
" - Dict of ``{column_name: format string}``\n",
" where format string is\n",
" strftime compatible in case of parsing string times or\n",
" is one of (D, s, ns, ms, us) in case of parsing\n",
" integer timestamps\n",
" - Dict of ``{column_name: arg dict}``,\n",
" where the arg dict corresponds to the keyword arguments of\n",
" :func:`pandas.to_datetime`\n",
" :param index_col: (optional) string or list of strings,\n",
" default: None\n",
" Column(s) to set as index(MultiIndex)\n",
" \n",
" :rtype: pandas.DataFrame\n",
" \n",
" Usage ::\n",
" >>> data = db.get_table('wrdssec_all', 'dforms', obs=1000, columns=['cik', 'fdate', 'coname'])\n",
" >>> data.head()\n",
" cik fdate coname\n",
" 0000000003 1995-02-15 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1996-02-14 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1997-02-19 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1998-03-02 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1998-03-10 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y..\n",
" ...\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"conn.get_table(library='comp', table='funda', \n",
" columns = ['conm', 'gvkey', 'cusip','cik', 'fyear', 'consol', 'tic', 'at', 'ni', 'prcc_f'], obs=20)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 677
},
"id": "qfpoRjqmcel9",
"outputId": "ada52a09-babd-4d9a-a0cf-46e857cf9a8e"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" conm gvkey cusip cik fyear consol \\\n",
"0 A & E PLASTIK PAK INC 001000 000032102 None 1961.0 C \n",
"1 A & E PLASTIK PAK INC 001000 000032102 None 1962.0 C \n",
"2 A & E PLASTIK PAK INC 001000 000032102 None 1963.0 C \n",
"3 A & E PLASTIK PAK INC 001000 000032102 None 1964.0 C \n",
"4 A & E PLASTIK PAK INC 001000 000032102 None 1965.0 C \n",
"5 A & E PLASTIK PAK INC 001000 000032102 None 1966.0 C \n",
"6 A & E PLASTIK PAK INC 001000 000032102 None 1967.0 C \n",
"7 A & E PLASTIK PAK INC 001000 000032102 None 1968.0 C \n",
"8 A & M FOOD SERVICES INC 001001 000165100 0000723576 1982.0 C \n",
"9 A & E PLASTIK PAK INC 001000 000032102 None 1969.0 C \n",
"10 A & E PLASTIK PAK INC 001000 000032102 None 1970.0 C \n",
"11 A & E PLASTIK PAK INC 001000 000032102 None 1971.0 C \n",
"12 A & E PLASTIK PAK INC 001000 000032102 None 1971.0 C \n",
"13 A & E PLASTIK PAK INC 001000 000032102 None 1972.0 C \n",
"14 A & E PLASTIK PAK INC 001000 000032102 None 1972.0 C \n",
"15 AAI CORP 001002 000352104 0001306124 1960.0 C \n",
"16 AGOIL INC-OLD 001188 008484107 None 1978.0 C \n",
"17 A & E PLASTIK PAK INC 001000 000032102 None 1973.0 C \n",
"18 A & E PLASTIK PAK INC 001000 000032102 None 1973.0 C \n",
"19 A & E PLASTIK PAK INC 001000 000032102 None 1974.0 C \n",
"\n",
" tic at ni prcc_f \n",
"0 AE.2 NaN NaN NaN \n",
"1 AE.2 NaN NaN NaN \n",
"2 AE.2 NaN 0.003 NaN \n",
"3 AE.2 1.416 0.052 NaN \n",
"4 AE.2 2.310 -0.197 NaN \n",
"5 AE.2 2.430 0.164 NaN \n",
"6 AE.2 2.456 -0.090 NaN \n",
"7 AE.2 5.922 0.463 NaN \n",
"8 AMFD. 8.590 0.965 NaN \n",
"9 AE.2 28.712 1.766 NaN \n",
"10 AE.2 33.450 0.558 10.000 \n",
"11 AE.2 29.330 -2.318 5.750 \n",
"12 AE.2 NaN -2.317 5.750 \n",
"13 AE.2 19.907 2.225 5.125 \n",
"14 AE.2 NaN 2.225 5.125 \n",
"15 AAIC.1 NaN NaN NaN \n",
"16 AGOLC NaN NaN NaN \n",
"17 AE.2 21.771 1.863 1.750 \n",
"18 AE.2 NaN 1.863 1.750 \n",
"19 AE.2 25.638 2.240 2.125 "
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" conm \n",
" gvkey \n",
" cusip \n",
" cik \n",
" fyear \n",
" consol \n",
" tic \n",
" at \n",
" ni \n",
" prcc_f \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1961.0 \n",
" C \n",
" AE.2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 1 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1962.0 \n",
" C \n",
" AE.2 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 2 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1963.0 \n",
" C \n",
" AE.2 \n",
" NaN \n",
" 0.003 \n",
" NaN \n",
" \n",
" \n",
" 3 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1964.0 \n",
" C \n",
" AE.2 \n",
" 1.416 \n",
" 0.052 \n",
" NaN \n",
" \n",
" \n",
" 4 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1965.0 \n",
" C \n",
" AE.2 \n",
" 2.310 \n",
" -0.197 \n",
" NaN \n",
" \n",
" \n",
" 5 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1966.0 \n",
" C \n",
" AE.2 \n",
" 2.430 \n",
" 0.164 \n",
" NaN \n",
" \n",
" \n",
" 6 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1967.0 \n",
" C \n",
" AE.2 \n",
" 2.456 \n",
" -0.090 \n",
" NaN \n",
" \n",
" \n",
" 7 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1968.0 \n",
" C \n",
" AE.2 \n",
" 5.922 \n",
" 0.463 \n",
" NaN \n",
" \n",
" \n",
" 8 \n",
" A & M FOOD SERVICES INC \n",
" 001001 \n",
" 000165100 \n",
" 0000723576 \n",
" 1982.0 \n",
" C \n",
" AMFD. \n",
" 8.590 \n",
" 0.965 \n",
" NaN \n",
" \n",
" \n",
" 9 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1969.0 \n",
" C \n",
" AE.2 \n",
" 28.712 \n",
" 1.766 \n",
" NaN \n",
" \n",
" \n",
" 10 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1970.0 \n",
" C \n",
" AE.2 \n",
" 33.450 \n",
" 0.558 \n",
" 10.000 \n",
" \n",
" \n",
" 11 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1971.0 \n",
" C \n",
" AE.2 \n",
" 29.330 \n",
" -2.318 \n",
" 5.750 \n",
" \n",
" \n",
" 12 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1971.0 \n",
" C \n",
" AE.2 \n",
" NaN \n",
" -2.317 \n",
" 5.750 \n",
" \n",
" \n",
" 13 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1972.0 \n",
" C \n",
" AE.2 \n",
" 19.907 \n",
" 2.225 \n",
" 5.125 \n",
" \n",
" \n",
" 14 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1972.0 \n",
" C \n",
" AE.2 \n",
" NaN \n",
" 2.225 \n",
" 5.125 \n",
" \n",
" \n",
" 15 \n",
" AAI CORP \n",
" 001002 \n",
" 000352104 \n",
" 0001306124 \n",
" 1960.0 \n",
" C \n",
" AAIC.1 \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 16 \n",
" AGOIL INC-OLD \n",
" 001188 \n",
" 008484107 \n",
" None \n",
" 1978.0 \n",
" C \n",
" AGOLC \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 17 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1973.0 \n",
" C \n",
" AE.2 \n",
" 21.771 \n",
" 1.863 \n",
" 1.750 \n",
" \n",
" \n",
" 18 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1973.0 \n",
" C \n",
" AE.2 \n",
" NaN \n",
" 1.863 \n",
" 1.750 \n",
" \n",
" \n",
" 19 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" 000032102 \n",
" None \n",
" 1974.0 \n",
" C \n",
" AE.2 \n",
" 25.638 \n",
" 2.240 \n",
" 2.125 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 45
}
]
},
{
"cell_type": "markdown",
"source": [
"I googled and found this. https://www.wiwi.uni-muenster.de/uf/sites/uf/files/2017_10_12_wrds_data_items.pdf \n",
"Also TSLA were ounded and incorporated on July 1, 2003, and issued IPOs on Tesla went on to launch its first initial public offering (IPO) on NASDAQ on June 29, 2010. They issued 13.3 million shares of common stock for the public at a price of $17.00 per share. \n",
"Note: I am not sure why there are two lines for each year. Need to do more investigation."
],
"metadata": {
"id": "Dmb304L1gNxr"
}
},
{
"cell_type": "code",
"source": [
"subset = conn.get_table(library='comp', table='funda', \n",
" columns = ['conm', 'gvkey', 'cusip','cik', 'fyear', 'consol', 'tic', 'at', 'ni', 'prcc_f', 'che', 'ebit'])\n",
"subset2=subset[subset[\"tic\"] == 'TSLA']\n",
"subset2"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "Em7x2ei9cegN",
"outputId": "95641933-f077-4606-8dc2-0f4845db739f"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" conm gvkey cusip cik fyear consol tic \\\n",
"330222 TESLA INC 184996 88160R101 0001318605 2005.0 C TSLA \n",
"330223 TESLA INC 184996 88160R101 0001318605 2006.0 C TSLA \n",
"330224 TESLA INC 184996 88160R101 0001318605 2007.0 C TSLA \n",
"333193 TESLA INC 184996 88160R101 0001318605 2008.0 C TSLA \n",
"333194 TESLA INC 184996 88160R101 0001318605 2008.0 C TSLA \n",
"333195 TESLA INC 184996 88160R101 0001318605 2009.0 C TSLA \n",
"333196 TESLA INC 184996 88160R101 0001318605 2009.0 C TSLA \n",
"333198 TESLA INC 184996 88160R101 0001318605 2010.0 C TSLA \n",
"333199 TESLA INC 184996 88160R101 0001318605 2010.0 C TSLA \n",
"333200 TESLA INC 184996 88160R101 0001318605 2011.0 C TSLA \n",
"333201 TESLA INC 184996 88160R101 0001318605 2011.0 C TSLA \n",
"333205 TESLA INC 184996 88160R101 0001318605 2012.0 C TSLA \n",
"333206 TESLA INC 184996 88160R101 0001318605 2012.0 C TSLA \n",
"333207 TESLA INC 184996 88160R101 0001318605 2013.0 C TSLA \n",
"333208 TESLA INC 184996 88160R101 0001318605 2013.0 C TSLA \n",
"333211 TESLA INC 184996 88160R101 0001318605 2014.0 C TSLA \n",
"333212 TESLA INC 184996 88160R101 0001318605 2014.0 C TSLA \n",
"333213 TESLA INC 184996 88160R101 0001318605 2015.0 C TSLA \n",
"333214 TESLA INC 184996 88160R101 0001318605 2015.0 C TSLA \n",
"333217 TESLA INC 184996 88160R101 0001318605 2016.0 C TSLA \n",
"333218 TESLA INC 184996 88160R101 0001318605 2016.0 C TSLA \n",
"333219 TESLA INC 184996 88160R101 0001318605 2017.0 C TSLA \n",
"333220 TESLA INC 184996 88160R101 0001318605 2017.0 C TSLA \n",
"333223 TESLA INC 184996 88160R101 0001318605 2018.0 C TSLA \n",
"333224 TESLA INC 184996 88160R101 0001318605 2018.0 C TSLA \n",
"333225 TESLA INC 184996 88160R101 0001318605 2019.0 C TSLA \n",
"333226 TESLA INC 184996 88160R101 0001318605 2019.0 C TSLA \n",
"333229 TESLA INC 184996 88160R101 0001318605 2020.0 C TSLA \n",
"333230 TESLA INC 184996 88160R101 0001318605 2020.0 C TSLA \n",
"333231 TESLA INC 184996 88160R101 0001318605 2021.0 C TSLA \n",
"333232 TESLA INC 184996 88160R101 0001318605 2021.0 C TSLA \n",
"\n",
" at ni prcc_f che ebit \n",
"330222 NaN NaN NaN NaN NaN \n",
"330223 NaN NaN NaN NaN NaN \n",
"330224 NaN NaN NaN NaN NaN \n",
"333193 51.699 -82.782 NaN 9.277 -78.504 \n",
"333194 NaN NaN NaN NaN NaN \n",
"333195 130.424 -55.740 NaN 69.627 -51.897 \n",
"333196 NaN NaN NaN NaN NaN \n",
"333198 386.082 -154.328 26.630 173.155 -146.838 \n",
"333199 NaN NaN 26.630 NaN NaN \n",
"333200 713.448 -254.411 28.560 303.803 -251.488 \n",
"333201 NaN NaN 28.560 NaN NaN \n",
"333205 1114.190 -396.213 33.870 220.984 -394.283 \n",
"333206 1114.190 -396.213 33.870 NaN NaN \n",
"333207 2416.930 -74.014 150.429 848.901 -61.283 \n",
"333208 2411.816 -74.014 150.429 NaN NaN \n",
"333211 5849.251 -294.040 222.410 1923.660 -186.689 \n",
"333212 5830.667 -294.040 222.410 NaN NaN \n",
"333213 8092.460 -888.663 240.010 1219.536 -716.629 \n",
"333214 8067.939 -888.663 240.010 NaN NaN \n",
"333217 22664.076 -674.914 213.690 3498.735 -645.640 \n",
"333218 22664.076 -674.914 213.690 NaN NaN \n",
"333219 28655.372 -1961.400 311.350 3523.237 -1565.086 \n",
"333220 28655.372 -1961.400 311.350 NaN NaN \n",
"333223 29739.614 -976.091 332.800 3878.169 -252.840 \n",
"333224 29739.614 -976.091 332.800 NaN NaN \n",
"333225 34309.000 -862.000 418.330 6514.000 80.000 \n",
"333226 34309.000 -862.000 418.330 NaN NaN \n",
"333229 52148.000 721.000 705.670 19622.000 2184.000 \n",
"333230 52148.000 721.000 705.670 NaN NaN \n",
"333231 62131.000 5519.000 1056.780 18052.000 6496.000 \n",
"333232 62131.000 5519.000 1056.780 NaN NaN "
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" conm \n",
" gvkey \n",
" cusip \n",
" cik \n",
" fyear \n",
" consol \n",
" tic \n",
" at \n",
" ni \n",
" prcc_f \n",
" che \n",
" ebit \n",
" \n",
" \n",
" \n",
" \n",
" 330222 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2005.0 \n",
" C \n",
" TSLA \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 330223 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2006.0 \n",
" C \n",
" TSLA \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 330224 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2007.0 \n",
" C \n",
" TSLA \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333193 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2008.0 \n",
" C \n",
" TSLA \n",
" 51.699 \n",
" -82.782 \n",
" NaN \n",
" 9.277 \n",
" -78.504 \n",
" \n",
" \n",
" 333194 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2008.0 \n",
" C \n",
" TSLA \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333195 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2009.0 \n",
" C \n",
" TSLA \n",
" 130.424 \n",
" -55.740 \n",
" NaN \n",
" 69.627 \n",
" -51.897 \n",
" \n",
" \n",
" 333196 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2009.0 \n",
" C \n",
" TSLA \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333198 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2010.0 \n",
" C \n",
" TSLA \n",
" 386.082 \n",
" -154.328 \n",
" 26.630 \n",
" 173.155 \n",
" -146.838 \n",
" \n",
" \n",
" 333199 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2010.0 \n",
" C \n",
" TSLA \n",
" NaN \n",
" NaN \n",
" 26.630 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333200 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2011.0 \n",
" C \n",
" TSLA \n",
" 713.448 \n",
" -254.411 \n",
" 28.560 \n",
" 303.803 \n",
" -251.488 \n",
" \n",
" \n",
" 333201 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2011.0 \n",
" C \n",
" TSLA \n",
" NaN \n",
" NaN \n",
" 28.560 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333205 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2012.0 \n",
" C \n",
" TSLA \n",
" 1114.190 \n",
" -396.213 \n",
" 33.870 \n",
" 220.984 \n",
" -394.283 \n",
" \n",
" \n",
" 333206 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2012.0 \n",
" C \n",
" TSLA \n",
" 1114.190 \n",
" -396.213 \n",
" 33.870 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333207 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2013.0 \n",
" C \n",
" TSLA \n",
" 2416.930 \n",
" -74.014 \n",
" 150.429 \n",
" 848.901 \n",
" -61.283 \n",
" \n",
" \n",
" 333208 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2013.0 \n",
" C \n",
" TSLA \n",
" 2411.816 \n",
" -74.014 \n",
" 150.429 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333211 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2014.0 \n",
" C \n",
" TSLA \n",
" 5849.251 \n",
" -294.040 \n",
" 222.410 \n",
" 1923.660 \n",
" -186.689 \n",
" \n",
" \n",
" 333212 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2014.0 \n",
" C \n",
" TSLA \n",
" 5830.667 \n",
" -294.040 \n",
" 222.410 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333213 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2015.0 \n",
" C \n",
" TSLA \n",
" 8092.460 \n",
" -888.663 \n",
" 240.010 \n",
" 1219.536 \n",
" -716.629 \n",
" \n",
" \n",
" 333214 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2015.0 \n",
" C \n",
" TSLA \n",
" 8067.939 \n",
" -888.663 \n",
" 240.010 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333217 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2016.0 \n",
" C \n",
" TSLA \n",
" 22664.076 \n",
" -674.914 \n",
" 213.690 \n",
" 3498.735 \n",
" -645.640 \n",
" \n",
" \n",
" 333218 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2016.0 \n",
" C \n",
" TSLA \n",
" 22664.076 \n",
" -674.914 \n",
" 213.690 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333219 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2017.0 \n",
" C \n",
" TSLA \n",
" 28655.372 \n",
" -1961.400 \n",
" 311.350 \n",
" 3523.237 \n",
" -1565.086 \n",
" \n",
" \n",
" 333220 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2017.0 \n",
" C \n",
" TSLA \n",
" 28655.372 \n",
" -1961.400 \n",
" 311.350 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333223 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2018.0 \n",
" C \n",
" TSLA \n",
" 29739.614 \n",
" -976.091 \n",
" 332.800 \n",
" 3878.169 \n",
" -252.840 \n",
" \n",
" \n",
" 333224 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2018.0 \n",
" C \n",
" TSLA \n",
" 29739.614 \n",
" -976.091 \n",
" 332.800 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333225 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2019.0 \n",
" C \n",
" TSLA \n",
" 34309.000 \n",
" -862.000 \n",
" 418.330 \n",
" 6514.000 \n",
" 80.000 \n",
" \n",
" \n",
" 333226 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2019.0 \n",
" C \n",
" TSLA \n",
" 34309.000 \n",
" -862.000 \n",
" 418.330 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333229 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2020.0 \n",
" C \n",
" TSLA \n",
" 52148.000 \n",
" 721.000 \n",
" 705.670 \n",
" 19622.000 \n",
" 2184.000 \n",
" \n",
" \n",
" 333230 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2020.0 \n",
" C \n",
" TSLA \n",
" 52148.000 \n",
" 721.000 \n",
" 705.670 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
" 333231 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2021.0 \n",
" C \n",
" TSLA \n",
" 62131.000 \n",
" 5519.000 \n",
" 1056.780 \n",
" 18052.000 \n",
" 6496.000 \n",
" \n",
" \n",
" 333232 \n",
" TESLA INC \n",
" 184996 \n",
" 88160R101 \n",
" 0001318605 \n",
" 2021.0 \n",
" C \n",
" TSLA \n",
" 62131.000 \n",
" 5519.000 \n",
" 1056.780 \n",
" NaN \n",
" NaN \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "markdown",
"source": [
"Let's look at `comp/company` table."
],
"metadata": {
"id": "mTXV0b5CcfUf"
}
},
{
"cell_type": "code",
"source": [
"print(conn.describe_table('comp', 'company'))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jofORZL8TUH6",
"outputId": "49e32824-e940-4a52-b005-16f8dc67982c"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Approximately 51778 rows in comp.company.\n",
" name nullable type\n",
"0 conm True VARCHAR(70)\n",
"1 gvkey True VARCHAR(6)\n",
"2 add1 True VARCHAR(65)\n",
"3 add2 True VARCHAR(65)\n",
"4 add3 True VARCHAR(65)\n",
"5 add4 True VARCHAR(65)\n",
"6 addzip True VARCHAR(20)\n",
"7 busdesc True VARCHAR(2000)\n",
"8 cik True VARCHAR(10)\n",
"9 city True VARCHAR(100)\n",
"10 conml True VARCHAR(100)\n",
"11 costat True VARCHAR(1)\n",
"12 county True VARCHAR(100)\n",
"13 dlrsn True VARCHAR(8)\n",
"14 ein True VARCHAR(10)\n",
"15 fax True VARCHAR(20)\n",
"16 fic True VARCHAR(3)\n",
"17 fyrc True DOUBLE_PRECISION\n",
"18 ggroup True VARCHAR(4)\n",
"19 gind True VARCHAR(6)\n",
"20 gsector True VARCHAR(2)\n",
"21 gsubind True VARCHAR(8)\n",
"22 idbflag True VARCHAR(1)\n",
"23 incorp True VARCHAR(8)\n",
"24 loc True VARCHAR(3)\n",
"25 naics True VARCHAR(6)\n",
"26 phone True VARCHAR(20)\n",
"27 prican True VARCHAR(8)\n",
"28 prirow True VARCHAR(8)\n",
"29 priusa True VARCHAR(8)\n",
"30 sic True VARCHAR(4)\n",
"31 spcindcd True DOUBLE_PRECISION\n",
"32 spcseccd True DOUBLE_PRECISION\n",
"33 spcsrc True VARCHAR(3)\n",
"34 state True VARCHAR(8)\n",
"35 stko True DOUBLE_PRECISION\n",
"36 weburl True VARCHAR(60)\n",
"37 dldte True DATE\n",
"38 ipodate True DATE\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(type(conn.get_table(library='comp', table='company', \n",
" columns = ['conm', 'gvkey', 'cik'], obs=10)))\n",
"print(45 * \"-\")\n",
"conn.get_table(library='comp', table='company', \n",
" columns = ['conm', 'gvkey', 'cik', 'busdesc', 'dlrsn', 'ipodate'], obs=10)"
],
"metadata": {
"id": "z5ElDCTkZT5l",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 398
},
"outputId": "50113486-32fb-4ab6-e489-43a7ea0b55d3"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"---------------------------------------------\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" conm gvkey cik \\\n",
"0 A & E PLASTIK PAK INC 001000 None \n",
"1 A & M FOOD SERVICES INC 001001 0000723576 \n",
"2 AAI CORP 001002 0001306124 \n",
"3 A.A. IMPORTING CO INC 001003 0000730052 \n",
"4 AAR CORP 001004 0000001750 \n",
"5 A.B.A. INDUSTRIES INC 001005 None \n",
"6 ABC INDS INC 001006 None \n",
"7 ABKCO INDUSTRIES INC 001007 0000001882 \n",
"8 ABM COMPUTER SYSTEMS INC 001008 None \n",
"9 ABS INDUSTRIES INC 001009 0000313368 \n",
"\n",
" busdesc dlrsn ipodate \n",
"0 None 09 None \n",
"1 None 01 None \n",
"2 AAI Corporation, together with its subsidiarie... 01 None \n",
"3 A.A. Importing Company, Inc. designs, manufact... 07 None \n",
"4 AAR Corp. provides products and services to co... None 1988-01-01 \n",
"5 A.B.A. Industries Inc. was acquired by McSwain... 01 None \n",
"6 ABC Industries, Inc. manufactures and supplies... 01 None \n",
"7 ABKCO Music & Records, Inc. operates as an ent... 10 None \n",
"8 None 04 None \n",
"9 Makes cold and warm forgings, including transm... 07 None "
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" conm \n",
" gvkey \n",
" cik \n",
" busdesc \n",
" dlrsn \n",
" ipodate \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A & E PLASTIK PAK INC \n",
" 001000 \n",
" None \n",
" None \n",
" 09 \n",
" None \n",
" \n",
" \n",
" 1 \n",
" A & M FOOD SERVICES INC \n",
" 001001 \n",
" 0000723576 \n",
" None \n",
" 01 \n",
" None \n",
" \n",
" \n",
" 2 \n",
" AAI CORP \n",
" 001002 \n",
" 0001306124 \n",
" AAI Corporation, together with its subsidiarie... \n",
" 01 \n",
" None \n",
" \n",
" \n",
" 3 \n",
" A.A. IMPORTING CO INC \n",
" 001003 \n",
" 0000730052 \n",
" A.A. Importing Company, Inc. designs, manufact... \n",
" 07 \n",
" None \n",
" \n",
" \n",
" 4 \n",
" AAR CORP \n",
" 001004 \n",
" 0000001750 \n",
" AAR Corp. provides products and services to co... \n",
" None \n",
" 1988-01-01 \n",
" \n",
" \n",
" 5 \n",
" A.B.A. INDUSTRIES INC \n",
" 001005 \n",
" None \n",
" A.B.A. Industries Inc. was acquired by McSwain... \n",
" 01 \n",
" None \n",
" \n",
" \n",
" 6 \n",
" ABC INDS INC \n",
" 001006 \n",
" None \n",
" ABC Industries, Inc. manufactures and supplies... \n",
" 01 \n",
" None \n",
" \n",
" \n",
" 7 \n",
" ABKCO INDUSTRIES INC \n",
" 001007 \n",
" 0000001882 \n",
" ABKCO Music & Records, Inc. operates as an ent... \n",
" 10 \n",
" None \n",
" \n",
" \n",
" 8 \n",
" ABM COMPUTER SYSTEMS INC \n",
" 001008 \n",
" None \n",
" None \n",
" 04 \n",
" None \n",
" \n",
" \n",
" 9 \n",
" ABS INDUSTRIES INC \n",
" 001009 \n",
" 0000313368 \n",
" Makes cold and warm forgings, including transm... \n",
" 07 \n",
" None \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 26
}
]
},
{
"cell_type": "code",
"source": [
"help(conn.raw_sql)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2Ptqnp2JTUB2",
"outputId": "063c8a39-43b1-4dde-85c1-e3e39b8ea30a"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Help on method raw_sql in module wrds.sql:\n",
"\n",
"raw_sql(sql, coerce_float=True, date_cols=None, index_col=None, params=None, chunksize=500000, return_iter=False) method of wrds.sql.Connection instance\n",
" Queries the database using a raw SQL string.\n",
" \n",
" :param sql: SQL code in string object.\n",
" :param coerce_float: (optional) boolean, default: True\n",
" Attempt to convert values to non-string, non-numeric objects\n",
" to floating point. Can result in loss of precision.\n",
" :param date_cols: (optional) list or dict, default: None\n",
" - List of column names to parse as date\n",
" - Dict of ``{column_name: format string}`` where\n",
" format string is:\n",
" strftime compatible in case of parsing string times or\n",
" is one of (D, s, ns, ms, us) in case of parsing\n",
" integer timestamps\n",
" - Dict of ``{column_name: arg dict}``,\n",
" where the arg dict corresponds to the keyword arguments of\n",
" :func:`pandas.to_datetime`\n",
" :param index_col: (optional) string or list of strings,\n",
" default: None\n",
" Column(s) to set as index(MultiIndex)\n",
" :param params: parameters to SQL query, if parameterized.\n",
" :param chunksize: (optional) integer or None default: 500000\n",
" Process query in chunks of this size. Smaller chunksizes can save\n",
" a considerable amount of memory while query is being processed.\n",
" Set to None run query w/o chunking.\n",
" :param return_iter: (optional) boolean, default:False\n",
" When chunksize is not None, return an iterator where chunksize\n",
" number of rows is included in each chunk.\n",
" \n",
" :rtype: pandas.DataFrame or or Iterator[pandas.DataFrame]\n",
" \n",
" \n",
" Usage ::\n",
" # Basic Usage\n",
" >>> data = db.raw_sql('select cik, fdate, coname from wrdssec_all.dforms;', date_cols=['fdate'], index_col='cik')\n",
" >>> data.head()\n",
" cik fdate coname\n",
" 0000000003 1995-02-15 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1996-02-14 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1997-02-19 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1998-03-02 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y...\n",
" 0000000003 1998-03-10 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y..\n",
" ...\n",
" \n",
" # Parameterized SQL query\n",
" >>> parm = {'syms': ('A', 'AA', 'AAPL'), 'num_shares': 50000}\n",
" >>> data = db.raw_sql('select * from taqmsec.ctm_20030910 where sym_root in %(syms)s and size > %(num_shares)s', params=parm)\n",
" >>> data.head()\n",
" date time_m ex sym_root sym_suffix tr_scond size price tr_stopind tr_corr tr_seqnum tr_source tr_rf\n",
" 2003-09-10 11:02:09.485000 T A None None 211400.0 25.350 N 00 1.929952e+15 C None\n",
" 2003-09-10 11:04:29.508000 N A None None 55500.0 25.180 N 00 1.929952e+15 C None\n",
" 2003-09-10 15:08:21.155000 N A None None 50500.0 24.470 N 00 1.929967e+15 C None\n",
" 2003-09-10 16:10:35.522000 T A None B 71900.0 24.918 N 00 1.929970e+15 C None\n",
" 2003-09-10 09:35:20.709000 N AA None None 108100.0 28.200 N 00 1.929947e+15 C None\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"apple = conn.raw_sql(\"\"\"select permno, date, prc, ret, shrout \n",
" from crsp.msf \n",
" where permno = 14593\n",
" and date>='01/01/2020'\"\"\", \n",
" date_cols=['date'])\n",
"apple"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 896
},
"id": "Q4RPi_IHZT8y",
"outputId": "b27e52b4-8f72-46a6-f25e-46259a4c4599"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" permno date prc ret shrout\n",
"0 14593.0 2020-01-31 309.510010 0.054010 4375480.0\n",
"1 14593.0 2020-02-28 273.359985 -0.114310 4375480.0\n",
"2 14593.0 2020-03-31 254.289993 -0.069761 4323987.0\n",
"3 14593.0 2020-04-30 293.799988 0.155374 4334335.0\n",
"4 14593.0 2020-05-29 317.940002 0.084956 4334335.0\n",
"5 14593.0 2020-06-30 364.799988 0.147386 4283939.0\n",
"6 14593.0 2020-07-31 425.040009 0.165132 4275634.0\n",
"7 14593.0 2020-08-31 129.039993 0.216309 17102536.0\n",
"8 14593.0 2020-09-30 115.809998 -0.102526 16976763.0\n",
"9 14593.0 2020-10-30 108.860001 -0.060012 17001802.0\n",
"10 14593.0 2020-11-30 119.050003 0.095490 17001802.0\n",
"11 14593.0 2020-12-31 132.690002 0.114574 16823263.0\n",
"12 14593.0 2021-01-29 131.960007 -0.005502 16788096.0\n",
"13 14593.0 2021-02-26 121.260002 -0.079532 16788096.0\n",
"14 14593.0 2021-03-31 122.150002 0.007340 16686305.0\n",
"15 14593.0 2021-04-30 131.460007 0.076218 16687631.0\n",
"16 14593.0 2021-05-28 124.610001 -0.050434 16687631.0\n",
"17 14593.0 2021-06-30 136.960007 0.099109 16556942.0\n",
"18 14593.0 2021-07-30 145.860001 0.064982 16530166.0\n",
"19 14593.0 2021-08-31 151.830002 0.042438 16530166.0\n",
"20 14593.0 2021-09-30 141.500000 -0.068037 16426786.0\n",
"21 14593.0 2021-10-29 149.800003 0.058657 16406397.0\n",
"22 14593.0 2021-11-30 165.300003 0.104940 16406397.0\n",
"23 14593.0 2021-12-31 177.570007 0.074229 16344923.0\n",
"24 14593.0 2022-01-31 174.779999 -0.015712 16319441.0\n",
"25 14593.0 2022-02-28 165.119995 -0.054011 16319441.0\n",
"26 14593.0 2022-03-31 174.610001 0.057473 16319441.0"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" permno \n",
" date \n",
" prc \n",
" ret \n",
" shrout \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 14593.0 \n",
" 2020-01-31 \n",
" 309.510010 \n",
" 0.054010 \n",
" 4375480.0 \n",
" \n",
" \n",
" 1 \n",
" 14593.0 \n",
" 2020-02-28 \n",
" 273.359985 \n",
" -0.114310 \n",
" 4375480.0 \n",
" \n",
" \n",
" 2 \n",
" 14593.0 \n",
" 2020-03-31 \n",
" 254.289993 \n",
" -0.069761 \n",
" 4323987.0 \n",
" \n",
" \n",
" 3 \n",
" 14593.0 \n",
" 2020-04-30 \n",
" 293.799988 \n",
" 0.155374 \n",
" 4334335.0 \n",
" \n",
" \n",
" 4 \n",
" 14593.0 \n",
" 2020-05-29 \n",
" 317.940002 \n",
" 0.084956 \n",
" 4334335.0 \n",
" \n",
" \n",
" 5 \n",
" 14593.0 \n",
" 2020-06-30 \n",
" 364.799988 \n",
" 0.147386 \n",
" 4283939.0 \n",
" \n",
" \n",
" 6 \n",
" 14593.0 \n",
" 2020-07-31 \n",
" 425.040009 \n",
" 0.165132 \n",
" 4275634.0 \n",
" \n",
" \n",
" 7 \n",
" 14593.0 \n",
" 2020-08-31 \n",
" 129.039993 \n",
" 0.216309 \n",
" 17102536.0 \n",
" \n",
" \n",
" 8 \n",
" 14593.0 \n",
" 2020-09-30 \n",
" 115.809998 \n",
" -0.102526 \n",
" 16976763.0 \n",
" \n",
" \n",
" 9 \n",
" 14593.0 \n",
" 2020-10-30 \n",
" 108.860001 \n",
" -0.060012 \n",
" 17001802.0 \n",
" \n",
" \n",
" 10 \n",
" 14593.0 \n",
" 2020-11-30 \n",
" 119.050003 \n",
" 0.095490 \n",
" 17001802.0 \n",
" \n",
" \n",
" 11 \n",
" 14593.0 \n",
" 2020-12-31 \n",
" 132.690002 \n",
" 0.114574 \n",
" 16823263.0 \n",
" \n",
" \n",
" 12 \n",
" 14593.0 \n",
" 2021-01-29 \n",
" 131.960007 \n",
" -0.005502 \n",
" 16788096.0 \n",
" \n",
" \n",
" 13 \n",
" 14593.0 \n",
" 2021-02-26 \n",
" 121.260002 \n",
" -0.079532 \n",
" 16788096.0 \n",
" \n",
" \n",
" 14 \n",
" 14593.0 \n",
" 2021-03-31 \n",
" 122.150002 \n",
" 0.007340 \n",
" 16686305.0 \n",
" \n",
" \n",
" 15 \n",
" 14593.0 \n",
" 2021-04-30 \n",
" 131.460007 \n",
" 0.076218 \n",
" 16687631.0 \n",
" \n",
" \n",
" 16 \n",
" 14593.0 \n",
" 2021-05-28 \n",
" 124.610001 \n",
" -0.050434 \n",
" 16687631.0 \n",
" \n",
" \n",
" 17 \n",
" 14593.0 \n",
" 2021-06-30 \n",
" 136.960007 \n",
" 0.099109 \n",
" 16556942.0 \n",
" \n",
" \n",
" 18 \n",
" 14593.0 \n",
" 2021-07-30 \n",
" 145.860001 \n",
" 0.064982 \n",
" 16530166.0 \n",
" \n",
" \n",
" 19 \n",
" 14593.0 \n",
" 2021-08-31 \n",
" 151.830002 \n",
" 0.042438 \n",
" 16530166.0 \n",
" \n",
" \n",
" 20 \n",
" 14593.0 \n",
" 2021-09-30 \n",
" 141.500000 \n",
" -0.068037 \n",
" 16426786.0 \n",
" \n",
" \n",
" 21 \n",
" 14593.0 \n",
" 2021-10-29 \n",
" 149.800003 \n",
" 0.058657 \n",
" 16406397.0 \n",
" \n",
" \n",
" 22 \n",
" 14593.0 \n",
" 2021-11-30 \n",
" 165.300003 \n",
" 0.104940 \n",
" 16406397.0 \n",
" \n",
" \n",
" 23 \n",
" 14593.0 \n",
" 2021-12-31 \n",
" 177.570007 \n",
" 0.074229 \n",
" 16344923.0 \n",
" \n",
" \n",
" 24 \n",
" 14593.0 \n",
" 2022-01-31 \n",
" 174.779999 \n",
" -0.015712 \n",
" 16319441.0 \n",
" \n",
" \n",
" 25 \n",
" 14593.0 \n",
" 2022-02-28 \n",
" 165.119995 \n",
" -0.054011 \n",
" 16319441.0 \n",
" \n",
" \n",
" 26 \n",
" 14593.0 \n",
" 2022-03-31 \n",
" 174.610001 \n",
" 0.057473 \n",
" 16319441.0 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 28
}
]
},
{
"cell_type": "code",
"source": [
"apple_fund = conn.raw_sql(\"\"\"select a.gvkey, a.iid, a.datadate, a.tic, a.conm,\n",
" a.at, b.prccm, b.cshoq \n",
" \n",
" from comp.funda a \n",
" inner join comp.secm b \n",
" \n",
" on a.gvkey = b.gvkey\n",
" and a.iid = b.iid\n",
" and a.datadate = b.datadate \n",
" \n",
" where a.tic = 'AAPL' \n",
" and a.datadate>='01/01/1960'\n",
" and a.datafmt = 'STD' \n",
" and a.consol = 'C' \n",
" and a.indfmt = 'INDL'\n",
" \"\"\", date_cols=['datadate'])\n",
"print(apple_fund)\n",
"print(apple_fund.info())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "teTJQHmlZT2t",
"outputId": "e1cc54a0-c852-4a53-a680-a59db1d61332"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" gvkey iid datadate tic conm at prccm cshoq\n",
"0 001690 01 1981-09-30 AAPL APPLE INC 254.838 15.249996 55.309\n",
"1 001690 01 1982-09-30 AAPL APPLE INC 357.787 18.249985 57.123\n",
"2 001690 01 1983-09-30 AAPL APPLE INC 556.579 23.125000 59.198\n",
"3 001690 01 1984-09-30 AAPL APPLE INC 788.786 25.125000 60.535\n",
"4 001690 01 1985-09-30 AAPL APPLE INC 936.177 15.750000 61.850\n",
"5 001690 01 1986-09-30 AAPL APPLE INC 1160.128 33.500000 62.628\n",
"6 001690 01 1987-09-30 AAPL APPLE INC 1477.931 56.500000 126.088\n",
"7 001690 01 1988-09-30 AAPL APPLE INC 2082.086 43.250000 122.768\n",
"8 001690 01 1989-09-30 AAPL APPLE INC 2743.899 44.500000 126.270\n",
"9 001690 01 1990-09-30 AAPL APPLE INC 2975.707 29.000000 115.359\n",
"10 001690 01 1991-09-30 AAPL APPLE INC 3493.597 49.500000 118.386\n",
"11 001690 01 1992-09-30 AAPL APPLE INC 4223.693 45.125000 118.479\n",
"12 001690 01 1993-09-30 AAPL APPLE INC 5171.412 23.375000 116.147\n",
"13 001690 01 1994-09-30 AAPL APPLE INC 5302.746 33.686996 119.543\n",
"14 001690 01 1995-09-30 AAPL APPLE INC 6231.000 37.250000 122.922\n",
"15 001690 01 1996-09-30 AAPL APPLE INC 5364.000 22.186996 124.497\n",
"16 001690 01 1997-09-30 AAPL APPLE INC 4233.000 21.686996 127.949\n",
"17 001690 01 1998-09-30 AAPL APPLE INC 4289.000 38.125000 135.193\n",
"18 001690 01 1999-09-30 AAPL APPLE INC 5161.000 63.312500 160.799\n",
"19 001690 01 2000-09-30 AAPL APPLE INC 6803.000 25.750000 335.677\n",
"20 001690 01 2001-09-30 AAPL APPLE INC 6021.000 15.510000 350.922\n",
"21 001690 01 2002-09-30 AAPL APPLE INC 6298.000 14.500000 358.959\n",
"22 001690 01 2003-09-30 AAPL APPLE INC 6815.000 20.720000 366.727\n",
"23 001690 01 2004-09-30 AAPL APPLE INC 8050.000 38.750000 391.444\n",
"24 001690 01 2005-09-30 AAPL APPLE INC 11551.000 53.610000 835.019\n",
"25 001690 01 2006-09-30 AAPL APPLE INC 17205.000 76.980000 855.263\n",
"26 001690 01 2007-09-30 AAPL APPLE INC 25347.000 153.470000 872.329\n",
"27 001690 01 2008-09-30 AAPL APPLE INC 39572.000 113.660000 888.326\n",
"28 001690 01 2009-09-30 AAPL APPLE INC 47501.000 185.350000 899.806\n",
"29 001690 01 2010-09-30 AAPL APPLE INC 75183.000 283.750000 915.970\n",
"30 001690 01 2011-09-30 AAPL APPLE INC 116371.000 381.320000 929.277\n",
"31 001690 01 2012-09-30 AAPL APPLE INC 176064.000 667.105000 939.208\n",
"32 001690 01 2013-09-30 AAPL APPLE INC 207000.000 476.750000 899.213\n",
"33 001690 01 2014-09-30 AAPL APPLE INC 231839.000 100.750000 5866.161\n",
"34 001690 01 2015-09-30 AAPL APPLE INC 290479.000 110.300000 5578.753\n",
"35 001690 01 2016-09-30 AAPL APPLE INC 321686.000 113.050000 5336.166\n",
"36 001690 01 2017-09-30 AAPL APPLE INC 375319.000 154.120000 5126.201\n",
"37 001690 01 2018-09-30 AAPL APPLE INC 365725.000 225.740000 4754.986\n",
"38 001690 01 2019-09-30 AAPL APPLE INC 338516.000 223.970000 4443.236\n",
"39 001690 01 2020-09-30 AAPL APPLE INC 323888.000 115.810000 16976.763\n",
"40 001690 01 2021-09-30 AAPL APPLE INC 351002.000 141.500000 16426.786\n",
"\n",
"RangeIndex: 41 entries, 0 to 40\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 gvkey 41 non-null object \n",
" 1 iid 41 non-null object \n",
" 2 datadate 41 non-null datetime64[ns]\n",
" 3 tic 41 non-null object \n",
" 4 conm 41 non-null object \n",
" 5 at 41 non-null float64 \n",
" 6 prccm 41 non-null float64 \n",
" 7 cshoq 41 non-null float64 \n",
"dtypes: datetime64[ns](1), float64(3), object(4)\n",
"memory usage: 2.7+ KB\n",
"None\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"data_directory = '/content/drive/My Drive/Colab Notebooks/data/'\n",
"!pwd"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "b8xtMKzWaYDk",
"outputId": "fa9912ad-a49e-4822-b881-bd0b8dd731e7"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"apple_fund.to_pickle(data_directory + \"apple_fund.pkl\")"
],
"metadata": {
"id": "4X1yxaIpZT0K"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "DqzlfW4NZTcZ"
},
"execution_count": null,
"outputs": []
}
]
}