{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Prepare environment\n", "import os, sys\n", "sys.path.insert(0, os.path.abspath('..'))\n", "from io import StringIO\n", "import pandas as pd\n", "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Prepare input data files\n", "gs1_csv = StringIO(\"\"\"\n", "symbol, barsize, date, close\n", "GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05\n", "GS, 5 min, 2016-07-12 11:20:00-07:00, 141.34\n", "\"\"\")\n", "\n", "gs2_csv = StringIO(\"\"\"\n", "symbol, barSize, datetime, close, volume\n", "GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05, 344428\n", "\"\"\")\n", "\n", "fb5min_csv = StringIO(\"\"\"\n", "time, c, vol\n", "2016-07-21 09:30:00, 120.05, 234242\n", "2016-07-21 09:35:00, 120.32, 410842\n", "\"\"\")\n", "\n", "fb1min_csv = StringIO(\"\"\"\n", "time, c, vol\n", "2016-07-25 09:40:00, 120.47, 579638\n", "2016-07-25 09:41:00, 120.82, 192476\n", "\"\"\")\n", "\n", "amzn_csv = StringIO(\"\"\"\n", "symb, bar, date, close, volume\n", "AMZN, 1 day, 2016-07-21, 749.22, 27917\n", "AMZN, 1 day, 2016-07-22, 738.87, 36662\n", "AMZN, 1 day, 2016-07-23, 727.23, 8766\n", "\"\"\")\n", "\n", "df_gs1, df_gs2, df_fb5m, df_fb1m, df_amzn = [\n", " pd.read_csv(f)\n", " for f in (gs1_csv, gs2_csv, fb5min_csv, fb1min_csv, amzn_csv)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example starts here\n", "------\n", "### Input DataFrames" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " symbol barsize date close\n", "0 GS 5 min 2016-07-12 10:35:00-07:00 140.05\n", "1 GS 5 min 2016-07-12 11:20:00-07:00 141.34\n", " symbol barSize datetime close volume\n", "0 GS 5 min 2016-07-12 10:35:00-07:00 140.05 344428\n", " time c vol\n", "0 2016-07-21 09:30:00 120.05 234242\n", "1 2016-07-21 09:35:00 120.32 410842\n", " time c vol\n", "0 2016-07-25 09:40:00 120.47 579638\n", "1 2016-07-25 09:41:00 120.82 192476\n", " symb bar date close volume\n", "0 AMZN 1 day 2016-07-21 749.22 27917\n", "1 AMZN 1 day 2016-07-22 738.87 36662\n", "2 AMZN 1 day 2016-07-23 727.23 8766\n" ] } ], "source": [ "print(df_gs1)\n", "print(df_gs2)\n", "print(df_fb5m)\n", "print(df_fb1m)\n", "print(df_amzn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a MarketDataBlock instance from pandas.DataFrame\n", "**Data are stored in `self.df`, a composition design. Column and index names are standardized for the DataFrame, and a pandas.MultiIndex is created.**" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " closing\n", "Symbol DataType BarSize TickerTime \n", "GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05\n", " 2016-07-12 14:20:00-04:00 141.34\n" ] } ], "source": [ "import pytz\n", "from ibstract import MarketDataBlock\n", "\n", "blk = MarketDataBlock(df_gs1, datatype='TRADES', tz=pytz.timezone('US/Eastern'))\n", "print(blk)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Date/Time index and time zone\n", "**The date/time column in strings are converted to ``pandas.DatetimeIndex``.\n", "When creating a MarketDataBlock instance, naive time stamps are localized, or a fixed-offset time zone is converted to region-based ``pytz.timezone``.**" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "TickerTime type: \n", "Time zone: US/Eastern \n" ] } ], "source": [ "print(\"\\nTickerTime type:\", type(blk.df.index.levels[3]))\n", "print(\"Time zone:\", blk.tzinfo, type(blk.tzinfo))" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### Update MarketDataBlock from a `pandas.DataFrame`\n", "**Update from a `pandas.DataFrame` will combine columns. N/A data in integer columns are converted to -1.**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " closing volume\n", "Symbol DataType BarSize TickerTime \n", "GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05 344428\n", " 2016-07-12 14:20:00-04:00 141.34 -1" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blk.update(df_gs2, datatype='TRADES', tz=pytz.timezone('US/Eastern'))\n", "blk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Update() can deal with a `DataFrame` having naive time stamps, or different BarSize rows, or without a BarSize column.**" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " closing volume\n", "Symbol DataType BarSize TickerTime \n", "FB TRADES 1m 2016-07-25 09:40:00-04:00 120.47 579638\n", " 2016-07-25 09:41:00-04:00 120.82 192476\n", " 5m 2016-07-21 09:30:00-04:00 120.05 234242\n", " 2016-07-21 09:35:00-04:00 120.32 410842\n", "GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05 344428\n", " 2016-07-12 14:20:00-04:00 141.34 -1" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blk.update(df_fb5m, symbol='FB', datatype='TRADES', barsize='5m', tz=pytz.timezone('US/Eastern'))\n", "blk.update(df_fb1m, symbol='FB', datatype='TRADES', barsize='1m', tz=pytz.timezone('US/Eastern'))\n", "blk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "** Combine with another MarketDataBlock instance is easier than updating from a DataFrame. **" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " closing volume\n", "Symbol DataType BarSize TickerTime \n", "AMZN TRADES 1d 2016-07-21 00:00:00-04:00 749.22 27917\n", " 2016-07-22 00:00:00-04:00 738.87 36662\n", " 2016-07-23 00:00:00-04:00 727.23 8766\n", "FB TRADES 1m 2016-07-25 09:40:00-04:00 120.47 579638\n", " 2016-07-25 09:41:00-04:00 120.82 192476\n", " 5m 2016-07-21 09:30:00-04:00 120.05 234242\n", " 2016-07-21 09:35:00-04:00 120.32 410842\n", "GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05 344428\n", " 2016-07-12 14:20:00-04:00 141.34 -1" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blk_amzn = MarketDataBlock(df_amzn, datatype='TRADES', tz=pytz.timezone('US/Eastern'))\n", "blk.combine(blk_amzn)\n", "blk" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }