{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:01.674314Z", "start_time": "2018-04-28T07:56:01.137131Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing requirements ..\n", "Done\n" ] } ], "source": [ "%%bash\n", "echo \"Installing requirements ..\"\n", "pip install pandas==0.22.0 quandl pandas_datareader alpha_vantage matplotlib plotly sklearn scipy fix_yahoo_finance statsmodels beautifulsoup4 > /dev/null 2>&1\n", "# NOTE: we use pandas 0.22 for now since pandas_datareader don't support 0.23 yet\n", "echo \"Done\"" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T08:19:43.885210Z", "start_time": "2018-04-28T08:19:43.814279Z" } }, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import datetime\n", "import numbers\n", "import subprocess\n", "import uuid\n", "import string\n", "import json \n", "import requests\n", "from io import StringIO\n", "import re\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import sklearn as sk\n", "from sklearn import linear_model\n", "\n", "import quandl\n", "\n", "import pandas_datareader\n", "from pandas_datareader import data as pdr\n", "import fix_yahoo_finance as yf\n", "yf.pdr_override() # <== that's all it takes :-)\n", "import alpha_vantage\n", "from alpha_vantage.timeseries import TimeSeries\n", "from alpha_vantage.cryptocurrencies import CryptoCurrencies\n", "\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "matplotlib.rcParams['figure.figsize'] = (20.0, 10.0) # Make plots bigger\n", "\n", "import plotly.offline as py\n", "import plotly.graph_objs as go\n", "py.init_notebook_mode()\n", "\n", "from pathlib import Path\n", "from bs4 import BeautifulSoup\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:02.862784Z", "start_time": "2018-04-28T07:56:02.858489Z" }, "collapsed": true }, "outputs": [], "source": [ "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"\n", "\n", "pd.set_option('display.float_format', lambda x: '{:,.2f}'.format(x))\n", "pd.set_option('display.max_rows', 5000)\n", "pd.set_option('display.max_columns', 500)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:02.867935Z", "start_time": "2018-04-28T07:56:02.864845Z" }, "collapsed": true }, "outputs": [], "source": [ "def pd_from_dict(d):\n", " return pd.DataFrame.from_dict(d, orient='index').T.sort_index()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:02.874900Z", "start_time": "2018-04-28T07:56:02.869722Z" }, "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "# (hack) Global configs\n", "conf_cache = None\n", "\n", "class GetConf:\n", " def __init__(self, splitAdj, divAdj, cache, secondary):\n", " self.splitAdj = splitAdj\n", " self.divAdj = divAdj\n", " self.cache = cache\n", " self.secondary = secondary" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:02.878953Z", "start_time": "2018-04-28T07:56:02.876242Z" }, "collapsed": true }, "outputs": [], "source": [ "if not \"fetchCache\" in globals():\n", " fetchCache = {}" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:02.893969Z", "start_time": "2018-04-28T07:56:02.880714Z" }, "collapsed": true }, "outputs": [], "source": [ "class Symbol:\n", " def __init__(self, fullname):\n", " self.fullname = fullname\n", " parts = fullname.split(\"!\")\n", " if len(parts) == 2:\n", " fullname = parts[0]\n", " self.currency = parts[1]\n", " else:\n", " self.currency = \"\"\n", " parts = fullname.split(\"@\")\n", " self.name = parts[0]\n", " if len(parts) == 2:\n", " self.source = parts[1]\n", " else:\n", " self.source = \"\"\n", " \n", " def __str__(self):\n", " return self.fullname" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:02.913013Z", "start_time": "2018-04-28T07:56:02.896057Z" }, "collapsed": true }, "outputs": [], "source": [ "import scipy.optimize\n", "from datetime import datetime as dt\n", "def xnpv(rate, values, dates):\n", " '''Equivalent of Excel's XNPV function.\n", "\n", " >>> from datetime import date\n", " >>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)]\n", " >>> values = [-10000, 20, 10100]\n", " >>> xnpv(0.1, values, dates)\n", " -966.4345...\n", " '''\n", " if rate <= -1.0:\n", " return float('inf')\n", " d0 = dates[0] # or min(dates)\n", " return sum([ vi / (1.0 + rate)**((di - d0).days / 365.0) for vi, di in zip(values, dates)])\n", "\n", "\n", "def xirr(values, dates):\n", " '''Equivalent of Excel's XIRR function.\n", "\n", " >>> from datetime import date\n", " >>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)]\n", " >>> values = [-10000, 20, 10100]\n", " >>> xirr(values, dates)\n", " 0.0100612...\n", " '''\n", " # we prefer to try brentq first as newton keeps outputting tolerance warnings\n", " try:\n", " return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10)\n", " #return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002)\n", " except RuntimeError: # Failed to converge?\n", " return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002)\n", " #return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10)\n", "\n", "#xirr([-100, 100, 200], [dt(2000, 1, 1), dt(2001, 1, 1), dt(2002, 1, 1)])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T07:56:02.935088Z", "start_time": "2018-04-28T07:56:02.915213Z" }, "collapsed": true }, "outputs": [], "source": [ "def curr_price(symbol):\n", " if symbol in ignoredAssets: return 0\n", " return get(symbol)[-1]\n", "\n", "#def getForex(fromCur, toCur):\n", "# if fromCur == toCur: return 1\n", "# if toCur == \"USD\":\n", "# return get(fromCur + \"=X\", \"Y\")\n", "# if fromCur == \"USD\":\n", "# return get(toCur + \"=X\", \"Y\").map(lambda x: 1.0/x)\n", "\n", "def getForex(fromCur, toCur):\n", " if fromCur == toCur: return 1\n", " tmp = get(fromCur + toCur + \"@CUR\").s\n", " tmp = tmp.reindex(pd.date_range(start=tmp.index[0], end=tmp.index[-1]))\n", " tmp = tmp.fillna(method=\"ffill\")\n", " return tmp\n", " #return wrap(tmp, fromCur+toCur)\n", "\n", "def convert(value, fromCur, toCur):\n", " if fromCur == toCur: return value\n", " return value * getForex(fromCur, toCur)[-1]\n" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "ExecuteTime": { "end_time": "2018-04-28T08:54:22.234169Z", "start_time": "2018-04-28T08:54:20.729000Z" }, "collapsed": true }, "outputs": [], "source": [ "\n", "def toSymbol(sym):\n", " if isinstance(sym, Symbol):\n", " return sym\n", " if isinstance(sym, str):\n", " return Symbol(sym)\n", " assert False, \"invalid type for Symbol: \" + str(type(sym)) + \", \" + str(sym)\n", "\n", "class DataSource:\n", " \n", " def __init__(self, source):\n", " self.source = source\n", " \n", " def fetch(self, symbol, conf):\n", " pass\n", " \n", " def process(self, symbol, df, conf):\n", " pass\n", " \n", " def get(self, symbol, conf):\n", " global conf_cache\n", " \n", " df = None\n", " failpath = cache_file(symbol, self.source) + \"._FAIL_\"\n", " \n", " if os.path.isfile(failpath):\n", " mtime = datetime.datetime.fromtimestamp(os.path.getmtime(failpath))\n", " diff = datetime.datetime.now() - mtime\n", " if diff.total_seconds() <= 24 * 3600:\n", " raise Exception(\"Fetching has previously failed for {0}, will try again later\".format(symbol))\n", " \n", " useCache = conf.cache\n", " if conf_cache != None:\n", " useCache = conf_cache\n", " if useCache:\n", " df = cache_get(symbol, self.source)\n", " \n", " try:\n", " # Attempt to actually fetch the symbol\n", " if df is None:\n", " print(\"Fetching %s from %s .. \" % (symbol, self.source), end=\"\")\n", " df = self.fetch(symbol, conf)\n", " print(\"DONE\")\n", " if df is None:\n", " print(\"FAILED\")\n", " raise Exception(\"Failed to fetch symbol: \" + str(symbol) + \" from \" + self.source)\n", " if len(df) == 0:\n", " print(\"FAILED\")\n", " raise Exception(\"Symbol fetched but is empty: \" + str(symbol) + \" from \" + self.source)\n", " except Exception as e:\n", " # save a note that we failed\n", " Path(failpath).touch()\n", " raise Exception from e\n", " \n", " cache_set(symbol, self.source, df)\n", " \n", " res = self.process(symbol, df, conf)\n", " return res.sort_index()\n", "\n", "fred_forex_codes = \"\"\"\n", "AUD\tDEXUSAL\n", "BRL\tDEXBZUS\n", "GBP\tDEXUSUK\n", "CAD\tDEXCAUS\n", "CNY\tDEXCHUS\n", "DKK\tDEXDNUS\n", "EUR\tDEXUSEU\n", "HKD\tDEXHKUS\n", "INR\tDEXINUS\n", "JPY\tDEXJPUS\n", "MYR\tDEXMAUS\n", "MXN\tDEXMXUS\n", "TWD\tDEXTAUS\n", "NOK\tDEXNOUS\n", "SGD\tDEXSIUS\n", "ZAR\tDEXSFUS\n", "KRW\tDEXKOUS\n", "LKR\tDEXSLUS\n", "SEK\tDEXSDUS\n", "CHF\tDEXSZUS\n", "VEF\tDEXVZUS\n", "\"\"\"\n", "\n", "boe_forex_codes = \"\"\"\n", "AUD\tXUDLADD\n", "CAD\tXUDLCDD\n", "CNY\tXUDLBK73\n", "CZK\tXUDLBK27\n", "DKK\tXUDLDKD\n", "HKD\tXUDLHDD\n", "HUF\tXUDLBK35\n", "INR\tXUDLBK64\n", "NIS\tXUDLBK65\n", "JPY\tXUDLJYD\n", "LTL\tXUDLBK38\n", "MYR\tXUDLBK66\n", "NZD\tXUDLNDD\n", "NOK\tXUDLNKD\n", "PLN\tXUDLBK49\n", "GBP\tXUDLGBD\n", "RUB\tXUDLBK69\n", "SAR\tXUDLSRD\n", "SGD\tXUDLSGD\n", "ZAR\tXUDLZRD\n", "KRW\tXUDLBK74\n", "SEK\tXUDLSKD\n", "CHF\tXUDLSFD\n", "TWD\tXUDLTWD\n", "THB\tXUDLBK72\n", "TRY\tXUDLBK75\n", "\"\"\"\n", "\n", "# https://blog.quandl.com/api-for-currency-data\n", "class ForexDataSource(DataSource):\n", " def __init__(self, source):\n", " self.fred_code_map = dict([s.split(\"\\t\") for s in fred_forex_codes.split(\"\\n\")[1:-1]])\n", " self.boe_code_map = dict([s.split(\"\\t\") for s in boe_forex_codes.split(\"\\n\")[1:-1]])\n", " self.boe_code_map[\"ILS\"] = self.boe_code_map[\"NIS\"]\n", " super().__init__(source)\n", " \n", " def fetch(self, symbol, conf):\n", " assert len(symbol.name) == 6\n", " _from = symbol.name[:3]\n", " _to = symbol.name[3:]\n", " if _to != \"USD\" and _from != \"USD\":\n", " raise Exception(\"Can only convert to/from USD\")\n", " invert = _from == \"USD\"\n", " curr = _to if invert else _from\n", " \n", " div100 = 1\n", " if curr == \"GBC\":\n", " div100 = 100\n", " curr = \"GBP\"\n", " \n", " if curr in self.fred_code_map:\n", " code = self.fred_code_map[curr]\n", " df = quandl.get(\"FRED/\" + code)\n", " if code.endswith(\"US\") != invert: # some of the FRED currencies are inverted vs the US dollar, argh..\n", " df = df.apply(lambda x: 1.0/x)\n", " return df / div100\n", "\n", " if curr in self.boe_code_map:\n", " code = self.boe_code_map[curr]\n", " df = quandl.get(\"BOE/\" + code)\n", " if not invert: # not sure if some of BEO currencies are NOT inverted vs USD, checked a few and they weren't\n", " df = df.apply(lambda x: 1.0/x)\n", " return df / div100\n", "\n", " raise Exception(\"Currency pair is not supported: \" + symbol.name)\n", " \n", " def process(self, symbol, df, conf):\n", " return df.iloc[:, 0]\n", " \n", "# https://github.com/ranaroussi/fix-yahoo-finance\n", "class YahooDataSource(DataSource):\n", " def fetch(self, symbol, conf):\n", " return pdr.get_data_yahoo(symbol.name, progress=False, actions=True)\n", "\n", " def process(self, symbol, df, conf):\n", " if not conf.splitAdj:\n", " assert not conf.divAdj\n", " # Yahoo \"Close\" data is split adjusted. \n", " # We find the unadjusted data using the splits data\n", " splitMul = df[\"Stock Splits\"][::-1].cumprod().shift().fillna(method=\"bfill\")\n", " return df[\"Close\"] / splitMul \n", " \n", " assert conf.splitAdj and conf.divAdj\n", " return df[\"Adj Close\"]\n", " \n", "\n", "class QuandlDataSource(DataSource):\n", " def fetch(self, symbol, conf):\n", " return quandl.get(symbol.name)\n", "\n", " def process(self, symbol, df, conf):\n", " if \"Close\" in df.columns:\n", " return df[\"Close\"]\n", " return df.iloc[:, 0]\n", "\n", " \n", "class GoogleDataSource(DataSource):\n", " def fetch(self, symbol, conf):\n", " return pandas_datareader.data.DataReader(symbol.name, 'google')\n", "\n", " def process(self, symbol, df, conf):\n", " return df[\"Close\"]\n", " \n", "AV_API_KEY = 'BB18'\n", "class AlphaVantageDataSource(DataSource):\n", "\n", " def fetch(self, symbol, conf):\n", " ts = TimeSeries(key=AV_API_KEY, output_format='pandas')\n", " df, meta_data = ts.get_daily_adjusted(symbol.name, outputsize=\"full\")\n", " df.index = pd.to_datetime(df.index, format=\"%Y-%m-%d\")\n", " return df\n", "\n", " def process(self, symbol, df, conf):\n", " return df[\"5. adjusted close\"]\n", " \n", "class AlphaVantageCryptoDataSource(DataSource):\n", "\n", " def fetch(self, symbol, conf):\n", " cc = CryptoCurrencies(key=AV_API_KEY, output_format='pandas')\n", " df, meta_data = cc.get_digital_currency_daily(symbol=symbol.name, market='USD')\n", " df.index = pd.to_datetime(df.index, format=\"%Y-%m-%d\")\n", " return df\n", "\n", " def process(self, symbol, df, conf):\n", " return df['4a. close (USD)']\n", "\n", "class CryptoCompareDataSource(DataSource):\n", " def fetch(self, symbol, conf):\n", " url = \"https://min-api.cryptocompare.com/data/histoday?fsym=__sym__&tsym=USD&limit=600000&aggregate=1&e=CCCAGG\"\n", " d = json.loads(requests.get(url.replace(\"__sym__\", symbol.name)).text)\n", " df = pd.DataFrame(d[\"Data\"])\n", " if len(df) == 0:\n", " return None\n", " df[\"time\"] = pd.to_datetime(df.time, unit=\"s\")\n", " df.set_index(\"time\", inplace=True)\n", " return df\n", "\n", " def process(self, symbol, df, conf):\n", " return df.close\n", "\n", "# NOTE: data is SPLIT adjusted, but has no dividends and is NOT DIVIDEND adjusted \n", "# NOTE: it has data all the way to the start, but returned result is capped in length for ~20 years\n", "# and results are trimmed from the END, not from the start. TBD to handle this properly.\n", "# for now we start at 1.1.2000\n", "class InvestingComDataSource(DataSource):\n", "\n", " def getUrl(self, symbol):\n", " symbol = symbol.name\n", " data = {\n", " 'search_text': symbol,\n", " 'term': symbol, \n", " 'country_id': '0',\n", " 'tab_id': 'All'\n", " }\n", " headers = {\n", " 'Origin': 'https://www.investing.com',\n", " 'Accept-Encoding': 'gzip, deflate, br',\n", " 'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',\n", " 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',\n", " 'Content-Type': 'application/x-www-form-urlencoded',\n", " 'Accept': 'application/json, text/javascript, */*; q=0.01',\n", " 'Referer': 'https://www.investing.com/search?q=' + symbol,\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " 'Connection': 'keep-alive' \n", " }\n", " r = requests.post(\"https://www.investing.com/search/service/search\", data=data, headers=headers)\n", " res = r.text\n", " res = json.loads(res)\n", " return res[\"All\"][0][\"link\"]\n", " \n", " def getCodes(self, url):\n", " url = \"https://www.investing.com\" + url + \"-historical-data\"\n", " \n", " headers = {\n", " 'Origin': 'https://www.investing.com',\n", " 'Accept-Encoding': 'gzip, deflate, br',\n", " 'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',\n", " 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',\n", " 'Content-Type': 'application/x-www-form-urlencoded',\n", " 'Accept': 'application/json, text/javascript, */*; q=0.01',\n", " 'Referer': 'https://www.investing.com/',\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " 'Connection': 'keep-alive' \n", " }\n", " r = requests.get(url,headers=headers)\n", " text = r.text\n", " \n", " m = re.search(\"smlId:\\s+(\\d+)\", text)\n", " smlId = m.group(1)\n", " \n", " m = re.search(\"pairId:\\s+(\\d+)\", text)\n", " pairId = m.group(1)\n", " \n", " return pairId, smlId\n", " \n", " def getHtml(self, pairId, smlId):\n", " data = [\n", " 'curr_id=' + pairId,\n", " 'smlID=' + smlId,\n", " 'header=',\n", " 'st_date=01%2F01%2F2000',\n", " 'end_date=01%2F01%2F2100',\n", " 'interval_sec=Daily',\n", " 'sort_col=date',\n", " 'sort_ord=DESC', \n", " 'action=historical_data'\n", " ]\n", " data = \"&\".join(data)\n", " headers = {\n", " 'Origin': 'https://www.investing.com',\n", " 'Accept-Encoding': 'gzip, deflate, br',\n", " 'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',\n", " 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',\n", " 'Content-Type': 'application/x-www-form-urlencoded',\n", " 'Accept': 'text/plain, */*; q=0.01',\n", " 'Referer': 'https://www.investing.com/',\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " 'Connection': 'keep-alive' \n", " }\n", " r = requests.post(\"https://www.investing.com/instruments/HistoricalDataAjax\", data=data, headers=headers)\n", " return r.text\n", " \n", " def fetch(self, symbol, conf):\n", " symbolUrl = self.getUrl(symbol)\n", " \n", " pairId, smlId = self.getCodes(symbolUrl)\n", " \n", " html = self.getHtml(pairId, smlId)\n", " #print(html)\n", " parsed_html = BeautifulSoup(html, \"lxml\")\n", " df = pd.DataFrame(columns=[\"date\", \"price\"])\n", " \n", " for i, tr in enumerate(parsed_html.find_all(\"tr\")[1:]): # skip header\n", " data = [x.get(\"data-real-value\") for x in tr.find_all(\"td\")]\n", " if len(data) == 0 or data[0] is None:\n", " continue\n", " date = datetime.datetime.utcfromtimestamp(int(data[0]))\n", " close = float(data[1].replace(\",\", \"\"))\n", " #open = data[2]\n", " #high = data[3]\n", " #low = data[4]\n", " #volume = data[5]\n", " df.loc[i] = [date, close]\n", " \n", " df = df.set_index(\"date\")\n", " return df\n", "\n", " def process(self, symbol, df, conf):\n", " return df['price']\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:50.649755Z", "start_time": "2018-03-15T20:31:50.293516Z" }, "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "# fetching data\n", "\n", "if not \"Wrapper\" in locals():\n", " class Wrapper(object):\n", "\n", " def __init__(self, s):\n", " #self.s = s\n", " object.__setattr__(self, \"s\", s)\n", "\n", " def __getattr__(self, name):\n", " attr = self.s.__getattribute__(name)\n", "\n", " if hasattr(attr, '__call__'):\n", " def newfunc(*args, **kwargs):\n", " result = attr(*args, **kwargs)\n", " if type(result) is pd.Series:\n", " result = Wrapper(result)\n", " return result\n", " return newfunc\n", "\n", " if type(attr) is pd.Series:\n", " attr = Wrapper(attr)\n", " return attr\n", "\n", " def __setattr__(self, name, value):\n", " self.s.__setattr__(name, value)\n", "\n", " def __getitem__(self, item):\n", " return wrap(self.s.__getitem__(item), self.s.name)\n", "\n", "# def __truediv__(self, other):\n", "# divisor = other\n", "# if type(other) is Wrapper:\n", "# divisor = other.s\n", "# series = self.s / divisor\n", "# name = self.name\n", "# if type(other) is Wrapper:\n", "# name = self.s.name + \" / \" + other.s.name\n", "# return wrap(series, name)\n", "\n", " def __truediv__(self, other):\n", " return Wrapper.doop(self, other, \"/\", lambda x, y: x / y)\n", " def __rtruediv__(self, other):\n", " return Wrapper.doop(self, other, \"/\", lambda x, y: x / y, right=True)\n", " \n", " def doop(self, other, opname, opLambda, right=False):\n", " divisor = other\n", " if type(other) is Wrapper:\n", " divisor = other.s\n", " if right:\n", " series = opLambda(divisor, self.s)\n", " else:\n", " series = opLambda(self.s, divisor)\n", " name = self.name\n", " if type(other) is Wrapper:\n", " if right:\n", " name = other.s.name + \" \" + opname + \" \" + self.s.name\n", " else:\n", " name = self.s.name + \" \" + opname + \" \" + other.s.name\n", " return wrap(series, name)\n", "\n", " def __sub__(self, other):\n", " return Wrapper.doop(self, other, \"-\", lambda x, y: x - y)\n", " #def __rsub__(self, other):\n", " # return Wrapper.doop(self, other, \"-\", lambda x, y: x - y, right=True)\n", "\n", " def __mul__(self, other):\n", " return Wrapper.doop(self, other, \"*\", lambda x, y: x * y)\n", " def __rmul__(self, other):\n", " return Wrapper.doop(self, other, \"*\", lambda x, y: x * y, right=True)\n", "\n", "def wrap(s, name=\"\"):\n", " name = name or s.name\n", " if not name:\n", " raise Exception(\"no name\")\n", " if isinstance(s, pd.Series):\n", " s = Wrapper(s)\n", " s.name = name\n", " return s\n", "\n", "data_sources = {\n", " \"Y\": YahooDataSource(\"Y\"),\n", " \"IC\": InvestingComDataSource(\"IC\"),\n", " \"Q\": QuandlDataSource(\"Q\"),\n", " \"AV\": AlphaVantageDataSource(\"AV\"),\n", " \"CC\": CryptoCompareDataSource(\"CC\"),\n", " \"CCAV\": AlphaVantageCryptoDataSource(\"CCAV\"),\n", " \"CUR\": ForexDataSource(\"CUR\"),\n", " \"G\": GoogleDataSource(\"G\")\n", " }\n", "\n", "def getFrom(symbol, conf):\n", " # special handling for forex\n", " # if a match, if will recurse and return here with XXXUSD@CUR\n", " if len(symbol.name) == 6 and not symbol.source:\n", " parts = symbol.name[:3], symbol.name[3:]\n", " if parts[0] == \"USD\" or parts[1] == \"USD\":\n", " return wrap(getForex(parts[0], parts[1]), symbol.name)\n", " \n", " source = symbol.source or \"Y\"\n", " if not source in data_sources:\n", " raise Exception(\"Unsupported source: \" + source)\n", " if not conf.secondary:\n", " return data_sources[source].get(symbol, conf)\n", " try:\n", " return data_sources[source].get(symbol, conf)\n", " except:\n", " print(\"Failed to fetch {0} from {1}, trying from {2} .. \".format(symbol, source, conf.secondary), end=\"\")\n", " res = data_sources[conf.secondary].get(symbol, conf)\n", " print(\"DONE\")\n", " return res\n", "\n", "def format_filename(s):\n", " valid_chars = \"-_.() %s%s\" % (string.ascii_letters, string.digits)\n", " filename = ''.join(c for c in s if c in valid_chars)\n", " filename = filename.replace(' ','_')\n", " return filename\n", " \n", "def cache_file(symbol, source):\n", " filepath = os.path.join(\"symbols\", source, format_filename(symbol.name))\n", " dirpath = os.path.dirname(filepath)\n", " if not os.path.exists(dirpath):\n", " os.makedirs(dirpath)\n", " return filepath\n", "\n", "symbols_mem_cache = {}\n", "def cache_get(symbol, source):\n", " if symbol.name in symbols_mem_cache:\n", " return symbols_mem_cache[symbol.name]\n", " filepath = cache_file(symbol, source)\n", " if os.path.exists(filepath):\n", " #res = pd.read_csv(filepath, squeeze=True, names=[\"date\", \"value\"], index_col=\"date\")\n", " res = pd.read_csv(filepath, squeeze=False, index_col=\"date\")\n", " res.index = pd.to_datetime(res.index, format=\"%Y-%m-%d\")\n", " symbols_mem_cache[symbol.name] = res\n", " return res\n", " return None\n", "\n", "def cache_set(symbol, source, s):\n", " filepath = cache_file(symbol, source)\n", " s.to_csv(filepath, date_format=\"%Y-%m-%d\", index_label=\"date\")\n", "\n", "\n", "def get_port(d, name=None):\n", " if isinstance(d, str):\n", " res = parse_portfolio_def(d)\n", " if not res:\n", " raise Exception(\"Invalid portfolio definition: \" + d)\n", " d = res\n", " if not isinstance(d, dict):\n", " raise Exception(\"Portfolio definition must be str or dict, was: \" + type(d)) \n", " df = pd.DataFrame(logret(get(k).s)*v/100 for k,v in d.items()).T.dropna()\n", " res = Wrapper(i_logret(df.sum(axis=1)))\n", " res.name = name\n", " return res\n", "\n", "def parse_portfolio_def(s):\n", " d = {}\n", " parts = s.split(\"|\")\n", " for p in parts:\n", " parts2 = p.split(\":\")\n", " if len(parts2) != 2:\n", " return None\n", " d[parts2[0]] = float(parts2[1])\n", " return d\n", "\n", "def get(symbol, cache=True, splitAdj=True, divAdj=True, adj=None, secondary=\"AV\"):\n", " global conf_cache\n", " if isinstance(symbol, Wrapper) or isinstance(symbol, pd.Series):\n", " return symbol\n", " if \"ignoredAssets\" in globals() and ignoredAssets and symbol in ignoredAssets:\n", " return wrap(pd.Series(), \"\")\n", " \n", " # special handing for composite portfolios\n", " port = parse_portfolio_def(symbol)\n", " if port:\n", " return get_port(port, symbol)\n", " \n", " symbol = toSymbol(symbol)\n", " if adj == False:\n", " splitAdj = False\n", " divAdj = False\n", "\n", " s = getFrom(symbol, GetConf(splitAdj, divAdj, cache, secondary))\n", " return wrap(s, symbol.fullname)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:50.669012Z", "start_time": "2018-03-15T20:31:50.652011Z" }, "collapsed": true }, "outputs": [], "source": [ "# def __getattribute__(self,name):\n", "# s = object.__getattribute__(self, \"s\")\n", "# if name == \"s\":\n", "# return s\n", " \n", "# attr = s.__getattribute__(name)\n", " \n", "# if hasattr(attr, '__call__'):\n", "# def newfunc(*args, **kwargs):\n", "# result = attr(*args, **kwargs)\n", "# if type(result) is pd.Series:\n", "# result = Wrapper(result)\n", "# return result\n", "# return newfunc\n", " \n", "# if type(attr) is pd.Series:\n", "# attr = Wrapper(attr)\n", "# return attr\n", " \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:50.745663Z", "start_time": "2018-03-15T20:31:50.672748Z" }, "collapsed": true }, "outputs": [], "source": [ "# plotting\n", "\n", "from plotly.graph_objs import *\n", "\n", "def createVerticalLine(xval):\n", " shape = {\n", " 'type': 'line',\n", " 'xref': 'x',\n", " 'x0': xval,\n", " 'x1': xval,\n", " 'yref': 'paper',\n", " 'y0': 0,\n", " 'y1': 1,\n", " #'fillcolor': 'blue',\n", " 'opacity': 1,\n", " 'line': {\n", " 'width': 1,\n", " 'color': 'red'\n", " }\n", " }\n", " return shape\n", " \n", "def createHorizontalLine(yval):\n", " shape = {\n", " 'type': 'line',\n", " 'xref': 'paper',\n", " 'x0': 0,\n", " 'x1': 1,\n", " 'yref': 'x',\n", " 'y0': yval,\n", " 'y1': yval,\n", " #'fillcolor': 'blue',\n", " 'opacity': 1,\n", " 'line': {\n", " 'width': 1,\n", " 'color': 'red'\n", " }\n", " }\n", " return shape\n", " \n", "def plot(*arr, log=True, title=None):\n", " data = []\n", " shapes = []\n", " for val in arr:\n", " if isinstance(val, Wrapper) or isinstance(val, pd.Series):\n", " data.append(go.Scatter(x=val.index, y=val, name=val.name))\n", " elif isinstance(val, datetime.datetime):\n", " shapes.append(createVerticalLine(val))\n", " elif isinstance(val, np.datetime64):\n", " shapes.append(createVerticalLine(val.astype(datetime.datetime)))\n", " elif isinstance(val, numbers.Real):\n", " shapes.append(createHorizontalLine(val))\n", " else:\n", " raise Exception(\"unsupported value type: \" + str(type(val)))\n", " \n", " for d in data:\n", " d = d.y\n", " if isinstance(d, Wrapper):\n", " d = d.s\n", " if np.any(d <= 0):\n", " log = False\n", " \n", " mar = 30\n", " margin=go.Margin(\n", " l=mar,\n", " r=mar,\n", " b=mar,\n", " t=mar,\n", " pad=0\n", " )\n", " legend=dict(x=0,y=1,traceorder='normal',\n", " bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1,\n", " font=dict(family='sans-serif',size=12,color='#000'),\n", " ) \n", " yaxisScale = \"log\" if log else None\n", " layout = go.Layout(legend=legend, margin=margin, yaxis=dict(type=yaxisScale, autorange=True), shapes=shapes, title=title)\n", " fig = go.Figure(data=data, layout=layout)\n", " py.iplot(fig)\n", "\n", "# show a stacked area chart normalized to 100% of multiple time series\n", "def plotly_area(df, title=None):\n", " tt = df.div(df.sum(axis=1), axis=0)*100 # normalize to summ 100\n", " tt = tt.reindex(tt.mean().sort_values(ascending=False).index, axis=1) # sort columns by mean value\n", " tt = tt.sort_index()\n", " tt2 = tt.cumsum(axis=1) # calc cum-sum\n", " data = []\n", " for col in tt2:\n", " s = tt2[col]\n", " trace = go.Scatter(\n", " name=col,\n", " x=s.index.to_datetime(),\n", " y=s.values,\n", " text=[\"{:.1f}%\".format(v) for v in tt[col].values], # use text as non-cumsum values\n", " hoverinfo='name+x+text',\n", " mode='lines',\n", " fill='tonexty'\n", " )\n", " data.append(trace)\n", "\n", " mar = 30\n", " margin=go.Margin(l=mar,r=mar,b=mar,t=mar,pad=0)\n", " legend=dict(x=0,y=1,traceorder='reversed',\n", " bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1,\n", " font=dict(family='sans-serif',size=12,color='#000'),\n", " ) \n", " layout = go.Layout(margin=margin, legend=legend, title=title,\n", " #showlegend=True,\n", " xaxis=dict(\n", " type='date',\n", " ),\n", " yaxis=dict(\n", " type='linear',\n", " range=[1, 100],\n", " dtick=20,\n", " ticksuffix='%'\n", " )\n", " )\n", " fig = go.Figure(data=data, layout=layout)\n", " py.iplot(fig, filename='stacked-area-plot')\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T21:10:29.869584Z", "start_time": "2018-03-15T21:10:29.602466Z" }, "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "# data processing\n", "\n", "def _start(s):\n", " return s.index[0]\n", "\n", "def _end(s):\n", " return s.index[-1]\n", "\n", "def getCommonDate(data, alldata=False):\n", " if alldata:\n", " l = [_start(s) for s in data if isinstance(s, Wrapper) or isinstance(s, pd.Series)]\n", " else:\n", " l = [_start(s) for s in data if isinstance(s, Wrapper)]\n", " if not l:\n", " return None\n", " return max(l)\n", "\n", "def doTrim(data, alldata=False):\n", " date = getCommonDate(data, alldata=alldata)\n", " if date is None:\n", " return data\n", " newArr = []\n", " for s in data:\n", " if isinstance(s, Wrapper) or (alldata and isinstance(s, pd.Series)):\n", " s = s[date:]\n", " newArr.append(s)\n", " return newArr\n", "\n", "def doAlign(data):\n", " date = getCommonDate(data)\n", " if date is None:\n", " return data\n", " newArr = []\n", " for s in data:\n", " if isinstance(s, Wrapper):\n", " s = s / s[date]\n", " newArr.append(s)\n", " return newArr\n", "\n", "def doClean(data):\n", " return [s.dropna() if isinstance(s, Wrapper) else s for s in data]\n", "\n", "def show(*data, trim=True, align=True, ta=True, **plotArgs):\n", " items = []\n", " \n", " # intercept \"cache\" arguemnt\n", " cache = plotArgs.get(\"cache\", None)\n", " if cache != None:\n", " del plotArgs[\"cache\"]\n", " \n", " for x in data:\n", " if isinstance(x, pd.DataFrame):\n", " items += [x[c] for c in x]\n", " elif isinstance(x, datetime.datetime) or isinstance(x, np.datetime64):\n", " items.append(x)\n", " elif isinstance(x, numbers.Real):\n", " items.append(x)\n", " else:\n", " x = get(x, cache) if cache != None else get(x)\n", " items.append(x)\n", " data = items\n", " #data = [get(s) for s in data] # converts string to symbols\n", " data = doClean(data)\n", " if not ta:\n", " trim = False\n", " align = False\n", " if trim: data = doTrim(data)\n", " if align: data = doAlign(data)\n", " plot(*data, **plotArgs)\n", "\n", "def ma(s, n):\n", " return s.rolling(n).mean()\n", "\n", "def mm(s, n):\n", " return s.rolling(n).median()\n", "\n", "def ret(s):\n", " return s.pct_change()\n", "\n", "def logret(s):\n", " res = np.log(s) - np.log(s.shift(1))\n", " res.name = \"logret(\" + s.name + \")\"\n", " return res\n", "\n", "def i_logret(s):\n", " return np.exp(np.cumsum(s))\n", "\n", "def lrret(regressors, target, sum1=False):\n", " regressors = [get(x) for x in regressors]\n", " target = get(target)\n", " all = [logret(x).s for x in (regressors + [target])]\n", " \n", " # based on: https://stats.stackexchange.com/questions/21565/how-do-i-fit-a-constrained-regression-in-r-so-that-coefficients-total-1?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa\n", " # NOTE: note finished, not working\n", " if sum1:\n", " allOrig = all\n", " last = all[-2]\n", " all = [r - last for r in (all[:-2] + [all[-1]])]\n", " \n", " data = pd.DataFrame(all).T\n", " data = data.dropna()\n", " y = data.iloc[:, -1]\n", " X = data.iloc[:, :-1]\n", "\n", " regr = linear_model.LinearRegression(fit_intercept=False)\n", " regr.fit(X, y)\n", " \n", " if sum1:\n", " weights = np.append(regr.coef_, 1-np.sum(regr.coef_))\n", " \n", " all = allOrig\n", " data = pd.DataFrame(all).T\n", " data = data.dropna()\n", " y = data.iloc[:, -1]\n", " X = data.iloc[:, :-1]\n", " regr = linear_model.LinearRegression(fit_intercept=False)\n", " regr.fit(X, y)\n", " \n", " regr.coef_ = weights\n", " \n", " y_pred = regr.predict(X)\n", "\n", " \n", " print('Regressors:', [s.name for s in regressors])\n", " print('Coefficients:', regr.coef_)\n", " #print('Coefficients*:', list(regr.coef_) + [1-np.sum(regr.coef_)])\n", " #print(\"Mean squared error: %.2f\" % mean_squared_error(diabetes_y_test, diabetes_y_pred))\n", " print('Variance score r^2: %.3f' % sk.metrics.r2_score(y, y_pred))\n", "\n", " y_pred = i_logret(pd.Series(y_pred, X.index))\n", " y_pred.name = target.name + \" fit\"\n", " #y_pred = \"fit\"\n", " y_pred = Wrapper(y_pred)\n", " show(target , y_pred)\n", " return y_pred\n", " \n", "def dd(x):\n", " if isinstance(x, Wrapper): # not sure why Wrapper doesn't work\n", " x = x.s\n", " res = (x / np.maximum.accumulate(x) - 1) * 100\n", " return res\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:50.999302Z", "start_time": "2018-03-15T20:31:50.989870Z" }, "collapsed": true }, "outputs": [], "source": [ "from IPython.core.display import Javascript\n", "import time, os, stat\n", "\n", "def publish(name=None):\n", " def file_age_in_seconds(pathname):\n", " return time.time() - os.stat(pathname)[stat.ST_MTIME]\n", "\n", " filename = !ls -t *.ipynb | grep -v /$ | head -1\n", " filename = filename[0]\n", "\n", " age = file_age_in_seconds(filename)\n", " min_age = 5\n", " if age > min_age:\n", " print(filename + \" file age is \" + str(age) + \" seconds, auto saving current notebook ..\")\n", " Javascript('console.log(document.querySelector(\"div#save-notbook button\").click())')\n", " print(\"save requested, sleeping to ensure execution ..\")\n", " time.sleep(15)\n", " print(\"done\")\n", " filename = !ls -t *.ipynb | grep -v /$ | head -1\n", " filename = filename[0]\n", " \n", " if not name:\n", " name = str(uuid.uuid4().hex.upper())\n", " save()\n", " print(\"Publishing \" + filename + \" ..\")\n", " res = subprocess.call(['bash', './publish.sh', name])\n", " if res == 0:\n", " print(\"published successfuly!\")\n", " print(\"https://nbviewer.jupyter.org/github/ertpload/test/blob/master/__name__.ipynb\".replace(\"__name__\", name))\n", " else:\n", " print(\"Failed!\")\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:51.004297Z", "start_time": "2018-03-15T20:31:51.001149Z" }, "collapsed": true }, "outputs": [], "source": [ "from IPython.display import display,Javascript \n", "def save():\n", " display(Javascript('IPython.notebook.save_checkpoint();'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:51.012037Z", "start_time": "2018-03-15T20:31:51.006524Z" }, "collapsed": true }, "outputs": [], "source": [ "# make the plotly graphs look wider on mobile\n", "from IPython.core.display import display, HTML\n", "s = \"\"\"\n", "\n", "\"\"\"\n", "display(HTML(s))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:51.050038Z", "start_time": "2018-03-15T20:31:51.014638Z" }, "collapsed": true }, "outputs": [], "source": [ "# interception to auto-fetch hardcoded symbols e.g:\n", "# show(SPY)\n", "# this should run last in the framework code, or it attempts to download unrelated symbols :)\n", "\n", "from IPython.core.inputtransformer import *\n", "intercept = True\n", "if intercept and not \"my_transformer_tokens_instance\" in locals():\n", " #print(\"transformation hook init\")\n", " attempted_implied_fetches = set()\n", " \n", " ip = get_ipython()\n", "\n", " @StatelessInputTransformer.wrap\n", " def my_transformer(line):\n", " if line.startswith(\"x\"):\n", " return \"specialcommand(\" + repr(line) + \")\"\n", " return line\n", "\n", " @TokenInputTransformer.wrap\n", " def my_transformer_tokens(tokens):\n", " for i, x in enumerate(tokens):\n", " if x.type == 1 and x.string.isupper() and x.string.isalpha(): ## type=1 is NAME token\n", " if i < len(tokens)-1 and tokens[i+1].type == 53 and tokens[i+1].string == \"=\":\n", " attempted_implied_fetches.add(x.string)\n", " continue\n", " if x.string in attempted_implied_fetches or x.string in ip.user_ns:\n", " continue\n", " try:\n", " ip.user_ns[x.string] = get(x.string)\n", " except:\n", " print(\"Failed to fetch implied symbol: \" + x.string)\n", " attempted_implied_fetches.add(x.string)\n", " return tokens\n", "\n", " my_transformer_tokens_instance = my_transformer_tokens()\n", " \n", " ip.input_splitter.logical_line_transforms.append(my_transformer_tokens_instance)\n", " ip.input_transformer_manager.logical_line_transforms.append(my_transformer_tokens_instance)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-03-15T20:31:51.056535Z", "start_time": "2018-03-15T20:31:51.052816Z" }, "collapsed": true }, "outputs": [], "source": [ "def date(s):\n", " return pd.to_datetime(s, format=\"%Y-%m-%d\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "another options for interception:\n", "```python\n", "class VarWatcher(object):\n", " def __init__(self, ip):\n", " self.shell = ip\n", " self.last_x = None\n", "\n", " def pre_execute(self):\n", " if False:\n", " for k in dir(self.shell):\n", " print(k, \":\", getattr(self.shell, k))\n", " print()\n", " #print(\"\\n\".join(dir(self.shell)))\n", " if \"content\" in self.shell.parent_header:\n", " code = self.shell.parent_header['content']['code']\n", " self.shell.user_ns[code] = 42\n", " #print(self.shell.user_ns.get('ASDF', None))\n", "\n", " def post_execute(self):\n", " pass\n", " #if self.shell.user_ns.get('x', None) != self.last_x:\n", " # print(\"x changed!\")\n", "\n", "def load_ipython_extension(ip):\n", " vw = VarWatcher(ip)\n", " ip.events.register('pre_execute', vw.pre_execute)\n", " ip.events.register('post_execute', vw.post_execute)\n", " \n", "ip = get_ipython()\n", "\n", "load_ipython_extension(ip) \n", "\n", "```" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }