{ "cells": [ { "cell_type": "markdown", "metadata": { "toc": true }, "source": [ "

Table of Contents

\n", "
" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T15:58:07.149766Z", "start_time": "2019-02-05T15:58:02.341266Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:\n", "b'In file included from /tmp/try_flags_rhuhvzcq.c:3:0:\\n/usr/include/stdio.h:27:10: fatal error: bits/libc-header-start.h: No such file or directory\\n #include \\n ^~~~~~~~~~~~~~~~~~~~~~~~~~\\ncompilation terminated.\\n'\n", "Mapped name None to device cuda: GeForce GTX 980 Ti (0000:01:00.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPython 3.7.2\n", "IPython 6.5.0\n", "\n", "compiler : GCC 7.3.0\n", "system : Linux\n", "release : 4.19.11-041911-generic\n", "machine : x86_64\n", "processor : x86_64\n", "CPU cores : 12\n", "interpreter: 64bit\n", "Git hash :\n", "\n", "json 2.0.9\n", "numpy 1.15.4\n", "pandas 0.23.4\n", "pymc3 3.6\n", "numba 0.41.0\n", "pystore 0.1.5\n", "matplotlib 3.0.2\n", "seaborn 0.9.0\n", "dask 1.0.0\n", "logzero 1.5.0\n", "\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%load_ext watermark\n", "\n", "from pathlib import PurePath, Path\n", "from collections import OrderedDict as od\n", "import sys\n", "import time \n", "import os\n", "import json\n", "from pprint import pprint, pformat\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import pymc3 as pm\n", "from dask.diagnostics import ProgressBar\n", "pbar = ProgressBar()\n", "pbar.register()\n", "import numpy as np\n", "import scipy.stats as stats\n", "import statsmodels.api as sm\n", "import numba as nb\n", "import pystore\n", "\n", "# import visual tools\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import matplotlib.gridspec as gridspec\n", "%matplotlib inline\n", "import seaborn as sns\n", "\n", "sns_params = {\n", " 'font.size':9.5,\n", " 'font.weight':'medium',\n", " 'figure.figsize':(10,7),\n", "}\n", "\n", "plt.style.use('seaborn-talk')\n", "plt.style.use('bmh')\n", "sns.set_context(sns_params)\n", "#plt.rcParams['font.family'] = 'DejaVu Sans Mono'\n", "#plt.rcParams['font.size'] = 9.5\n", "#plt.rcParams['font.weight'] = 'medium'\n", "#plt.rcParams['figure.figsize'] = 10,7\n", "#blue, green, red, purple, gold, teal = sns.color_palette('colorblind', 6)\n", "\n", "# import util libs\n", "from tqdm import tqdm, tqdm_notebook\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "from dotenv import load_dotenv\n", "load_dotenv(verbose=True)\n", "\n", "from logzero import logger as lz_logger\n", "\n", "from src.tools.pystore_tools import *\n", "from src.tools.utils import *\n", "from src.CONSTANTS import *\n", "\n", "pdir = get_relative_project_dir(\n", " project_repo_name=PROJECT_REPO_NAME, partial=False)\n", "data_dir = Path(pdir/'data')\n", "\n", "%watermark -v -m -g\n", "print()\n", "%watermark --iversions" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T15:58:07.595020Z", "start_time": "2019-02-05T15:58:07.154107Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SymbolNameLastSaleNetChangeNetChangeDirectionPercentChange1YrPercentChange
0AAXJiShares MSCI All Country Asia ex Japan Index Fund54.7100-0.2119down0.392.76
1ACWIiShares MSCI ACWI Index Fund59.4800-0.0868down0.158.21
2ACWViShares Edge MSCI Min Vol Global ETF72.4498-1.2402down1.686.75
3ACWXiShares MSCI ACWI ex US Index Fund40.1600-0.0401down0.103.10
4ADREBLDRS Emerging Markets 50 ADR Index Fund32.26000.0300up0.0910.34
\n", "
" ], "text/plain": [ " Symbol Name LastSale \\\n", "0 AAXJ iShares MSCI All Country Asia ex Japan Index Fund 54.7100 \n", "1 ACWI iShares MSCI ACWI Index Fund 59.4800 \n", "2 ACWV iShares Edge MSCI Min Vol Global ETF 72.4498 \n", "3 ACWX iShares MSCI ACWI ex US Index Fund 40.1600 \n", "4 ADRE BLDRS Emerging Markets 50 ADR Index Fund 32.2600 \n", "\n", " NetChange NetChangeDirection PercentChange 1YrPercentChange \n", "0 -0.2119 down 0.39 2.76 \n", "1 -0.0868 down 0.15 8.21 \n", "2 -1.2402 down 1.68 6.75 \n", "3 -0.0401 down 0.10 3.10 \n", "4 0.0300 up 0.09 10.34 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_ext = data_dir / 'external'\n", "symbols = (pd.read_csv(\n", " Path(data_ext/'ETFList.Options.Nasdaq__M.csv')\n", "))#.Symbol)\n", "symbols[:5]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T15:58:08.930991Z", "start_time": "2019-02-05T15:58:07.596742Z" }, "scrolled": true }, "outputs": [], "source": [ "pystore_data_dir = Path('/media/bcr/HDD/Code_Backups_Sync/iex_intraday_equity_downloader/data/processed')\n", "item = get_item(symbols.Symbol[0], pystore_data_dir)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T15:58:11.000480Z", "start_time": "2019-02-05T15:58:08.933503Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[########################################] | 100% Completed | 1.6s\n", "-------------------------------------------------------------------------------\n", "dataframe information\n", "-------------------------------------------------------------------------------\n", " lastSalePrice lastSaleSize volume spread \\\n", "index \n", "2018-12-31 13:55:00 63.971667 605.000000 388675.666667 0.063333 \n", "2018-12-31 13:56:00 63.990000 400.000000 389254.000000 0.060000 \n", "2018-12-31 13:57:00 63.905000 962.000000 390854.000000 0.050000 \n", "2018-12-31 13:58:00 63.900000 266.666667 391662.000000 0.053333 \n", "2018-12-31 13:59:00 63.936667 166.666667 391828.666667 0.023333 \n", "\n", " mid_price dollar_volume \n", "index \n", "2018-12-31 13:55:00 63.965000 2.486425e+07 \n", "2018-12-31 13:56:00 64.000000 2.490836e+07 \n", "2018-12-31 13:57:00 63.905000 2.497752e+07 \n", "2018-12-31 13:58:00 63.923333 2.502720e+07 \n", "2018-12-31 13:59:00 63.945000 2.505222e+07 \n", "--------------------------------------------------\n", "\n", "DatetimeIndex: 20397 entries, 2018-10-01 07:32:00 to 2018-12-31 13:59:00\n", "Data columns (total 6 columns):\n", "lastSalePrice 20397 non-null float64\n", "lastSaleSize 20397 non-null float64\n", "volume 20397 non-null float64\n", "spread 20397 non-null float64\n", "mid_price 20397 non-null float64\n", "dollar_volume 20397 non-null float64\n", "dtypes: float64(6)\n", "memory usage: 1.1 MB\n", "None\n", "-------------------------------------------------------------------------------\n", "\n" ] } ], "source": [ "keep_cols = ['lastSalePrice','lastSaleSize','volume','spread','mid_price','dollar_volume']\n", "df = item.data.loc['2018-10-01':'2018-12-31'][keep_cols].resample('1Min').mean().dropna().compute()\n", "cprint(df)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T16:01:07.753655Z", "start_time": "2019-02-05T16:01:07.516906Z" } }, "outputs": [ { "data": { "text/plain": [ "(42,)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "issuers = ['Vanguard'] #['iShares','SPDR','Vanguard']#,'PowerShares','Schwab','JPMorgan','Guggenheim','First Trust']\n", "names = [x for x in symbols['Name'] if any(y in x for y in issuers)]\n", "select_symbol_df = symbols[symbols['Name'].isin(names)]\n", "select_symbols = select_symbol_df.Symbol\n", "print(select_symbols.shape)\n", "\n", "print(select_symbol_df.Name.sort_values())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get select symbols in single df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T00:54:30.960354Z", "start_time": "2019-02-05T00:53:02.201720Z" }, "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c6612b6e7122442ea8571183be9d8965", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=247), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "********* AAXJ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* ACWI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* ACWV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* ACWX *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* AGG *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* BIL *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* BIV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* BKF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* BLV *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* BND *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* BNDX *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* BSV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* BWX *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* CIU *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* CLY *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* CRED *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* CSJ *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* CWB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* CWI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* DGT *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* DIA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* DSI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* DVY *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* DWX *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* EBND *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* ECH *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* ECNS *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* EDIV *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* EEM *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EEMV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EFA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EFAV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EFG *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EFV *********\n", "[########################################] | 100% Completed | 0.4s\n", "********* EIDO *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EMB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EMIF *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* ENZL *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* EPHE *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EPP *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EPU *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* EUFN *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EUSA *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* EWA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWC *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWD *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWG *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWH *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWJ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWK *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* EWL *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWM *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWN *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWP *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWQ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWS *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWT *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWU *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWW *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWX *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* EWY *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EWZ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EZA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* EZU *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* FEZ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* FXI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* GAF *********\n", "GAF error: Can only resample dataframes with known divisions\n", "See https://docs.dask.org/en/latest/dataframe-design.html#partitions\n", "for more information.\n", "[########################################] | 100% Completed | 0.1s\n", "GAF second error: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'\n", "********* GII *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* GLD *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* GMF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* GNR *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* GSG *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* GUR *********\n", "GUR error: Can only resample dataframes with known divisions\n", "See https://docs.dask.org/en/latest/dataframe-design.html#partitions\n", "for more information.\n", "[########################################] | 100% Completed | 0.1s\n", "GUR second error: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'\n", "********* GVI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* GWL *********\n", "GWL error: Can only resample dataframes with known divisions\n", "See https://docs.dask.org/en/latest/dataframe-design.html#partitions\n", "for more information.\n", "[########################################] | 100% Completed | 0.1s\n", "GWL second error: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'\n", "********* GWX *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* GXC *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* HDV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* HEFA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* HEWG *********\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[########################################] | 100% Completed | 0.2s\n", "********* HEWJ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* HEZU *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* HYG *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* HYMB *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IAI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IAT *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IBB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* ICF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IDU *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IDV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IEF *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IEFA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IEI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IEMG *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IEO *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* IEV *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IEZ *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IFGL *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IGE *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IGF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IGN *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IGOV *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IGV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IHE *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IHF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IHI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IJH *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IJJ *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IJK *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IJR *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IJS *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IJT *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* ILF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* INDA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* INDY *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IOO *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IPFF *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* ITB *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* ITOT *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IVE *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IVV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IVW *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IWB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IWC *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IWD *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IWF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IWM *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IWN *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IWO *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IWP *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IWR *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IWS *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IWV *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IXC *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IXJ *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IXP *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IXUS *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IYE *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IYF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IYG *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IYH *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IYJ *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IYM *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IYR *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* IYT *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IYW *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* IYY *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* IYZ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* JNK *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* KBE *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* KCE *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* KIE *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* KLD *********\n", "KLD error: Can only resample dataframes with known divisions\n", "See https://docs.dask.org/en/latest/dataframe-design.html#partitions\n", "for more information.\n", "[########################################] | 100% Completed | 0.1s\n", "KLD second error: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'\n", "********* KRE *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* KXI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* LEMB *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* LQD *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* MBB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* MCHI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* MDY *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* MGC *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* MGK *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* MGV *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* MUB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* MXI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* OEF *********\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[########################################] | 100% Completed | 0.3s\n", "********* PFF *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* QUAL *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* REM *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* REZ *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* RING *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* RWO *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* RWR *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* RWX *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* SCJ *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* SCPB *********\n", "SCPB error: Can only resample dataframes with known divisions\n", "See https://docs.dask.org/en/latest/dataframe-design.html#partitions\n", "for more information.\n", "[########################################] | 100% Completed | 0.1s\n", "SCPB second error: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'\n", "********* SCZ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* SDY *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* SHM *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* SHV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* SHY *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* SJNK *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* SLV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* SOXX *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* SPY *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* TFI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* THD *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* THRK *********\n", "THRK error: Can only resample dataframes with known divisions\n", "See https://docs.dask.org/en/latest/dataframe-design.html#partitions\n", "for more information.\n", "[########################################] | 100% Completed | 0.1s\n", "THRK second error: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'\n", "********* TIP *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* TLH *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* TLO *********\n", "TLO error: Can only resample dataframes with known divisions\n", "See https://docs.dask.org/en/latest/dataframe-design.html#partitions\n", "for more information.\n", "[########################################] | 100% Completed | 0.1s\n", "TLO second error: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'\n", "********* TLT *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* TUR *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* USMV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VAW *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VBK *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VBR *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VCIT *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VCLT *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VCR *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VCSH *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VDC *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VDE *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VEA *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VEU *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VFH *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VGK *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VGT *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VHT *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VIG *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VIS *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VNQ *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VNQI *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VOE *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VOO *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VOT *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VOX *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VPL *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VPU *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VSS *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VTI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VTV *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VTWO *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VUG *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VWO *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* VXF *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VXUS *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* VYM *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* WOOD *********\n", "[########################################] | 100% Completed | 0.1s\n", "********* XBI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XES *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XHB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLB *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLE *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLF *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLI *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLK *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLP *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLU *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XLV *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* XLY *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* XME *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* XOP *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XPH *********\n", "[########################################] | 100% Completed | 0.2s\n", "********* XRT *********\n", "[########################################] | 100% Completed | 0.3s\n", "********* XSD *********\n", "[########################################] | 100% Completed | 0.2s\n", "\n", "['GAF', 'GUR', 'GWL', 'KLD', 'SCPB', 'THRK', 'TLO']\n" ] } ], "source": [ "error_symbols = []\n", "D = dict()\n", "\n", "keep_cols = ['lastSalePrice','lastSaleSize','volume',\n", " 'spread','mid_price','dollar_volume']\n", "start, end = '2018-10-01', '2018-12-31'\n", "\n", "for sym in tqdm_notebook(select_symbols):\n", " print(f'********* {sym} *********')\n", " item = get_item(sym, pystore_data_dir)\n", " try:\n", " tmp_df = (item.data\n", " .loc[start:end]\n", " [keep_cols]\n", " .resample('1Min')\n", " .mean()\n", " .dropna()\n", " .compute())\n", " D[sym] = tmp_df \n", " except Exception as e:\n", " print(f'{sym} error: {e}')\n", " try:\n", " tmp_df = (item.data\n", " .loc[start:end]\n", " [keep_cols]\n", " .compute()\n", " .resample('1Min')\n", " .mean()\n", " .dropna())\n", " D[sym] = tmp_df\n", " except Exception as e:\n", " print(f'{sym} second error: {e}')\n", " error_symbols.append(sym)\n", " continue\n", " \n", "print(error_symbols) " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T00:54:31.707406Z", "start_time": "2019-02-05T00:54:30.963334Z" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------------------------------------------------------------------------------\n", "dataframe information\n", "-------------------------------------------------------------------------------\n", " lastSalePrice lastSaleSize volume spread \\\n", "symbol dates \n", "XSD 2018-12-31 13:55:00 64.6 100.0 200.0 0.0 \n", " 2018-12-31 13:56:00 64.6 100.0 200.0 0.0 \n", " 2018-12-31 13:57:00 64.6 100.0 200.0 0.0 \n", " 2018-12-31 13:58:00 64.6 100.0 200.0 0.0 \n", " 2018-12-31 13:59:00 64.6 100.0 200.0 0.0 \n", "\n", " mid_price dollar_volume \n", "symbol dates \n", "XSD 2018-12-31 13:55:00 0.0 12920.0 \n", " 2018-12-31 13:56:00 0.0 12920.0 \n", " 2018-12-31 13:57:00 0.0 12920.0 \n", " 2018-12-31 13:58:00 0.0 12920.0 \n", " 2018-12-31 13:59:00 0.0 12920.0 \n", "--------------------------------------------------\n", "\n", "MultiIndex: 4078721 entries, (AAXJ, 2018-10-01 07:32:00) to (XSD, 2018-12-31 13:59:00)\n", "Data columns (total 6 columns):\n", "lastSalePrice float64\n", "lastSaleSize float64\n", "volume float64\n", "spread float64\n", "mid_price float64\n", "dollar_volume float64\n", "dtypes: float64(6)\n", "memory usage: 202.4+ MB\n", "None\n", "-------------------------------------------------------------------------------\n", "\n" ] } ], "source": [ "df = (pd.concat(D, keys=D.keys()))\n", "df.index.names = ['symbol','dates']\n", "cprint(df)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2019-02-05T00:54:34.423356Z", "start_time": "2019-02-05T00:54:31.711523Z" } }, "outputs": [], "source": [ "outfp = Path(data_dir/'processed'/'etf_symbols_01.parq')\n", "df.to_parquet(outfp)" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:py37]", "language": "python", "name": "conda-env-py37-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }