{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PREDICTING FUTURE BEHAVIOUR OF S&P 500 STOCK MARKET INDEX\n", "# A machine learning-based approach leveraging MarketPsych sentiment indicators\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Index\n", "* [1. Data loading and some EDA](#1.-Data-loading-and-some-EDA)\n", " * [Sentiment indicators](#Sentiment-indicators)\n", " * [Market data](#Market-data)\n", "* [2. Data preparation](#2.-Data-preparation)\n", " * [Missing values](#Missing-values)\n", " * [Transformations](#Transformations)\n", " * [Smoothing](#Smoothing)\n", " * [Feature Engineering](#Feature-Engineering)\n", " * [PCA](#PCA)\n", " * [Technical indicators](#Technical-indicators)\n", " * [New variables (crossovers)](#New-variables-(crossovers))\n", " * [Labeling target variable](#Labeling-target-variable)\n", "* [3. Model](#3.-Model)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "[(0.0, 0.26666666666666666, 0.5058823529411764),\n", " (0.17647058823529413, 0.8, 0.803921568627451),\n", " (0.8470588235294118, 0.7450980392156863, 0.4588235294117647),\n", " (0.09803921568627451, 0.45098039215686275, 0.7215686274509804),\n", " (0.3568627450980392, 0.7450980392156863, 1.0),\n", " (0.9686274509803922, 0.5372549019607843, 0.23137254901960785),\n", " (0.00784313725490196, 0.6470588235294118, 0.6470588235294118),\n", " (0.2823529411764706, 0.6823529411764706, 0.39215686274509803),\n", " (0.9725490196078431, 0.803921568627451, 0.3176470588235294),\n", " (0.9686274509803922, 0.5450980392156862, 0.9098039215686274)]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import mlfinlab as mlfin\n", "import pandas as pd\n", "import pandas_datareader as pdr\n", "import pandas_profiling as pf\n", "import numpy as np\n", "from yahoo_finance import Share\n", "from datetime import datetime, timedelta, date\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import missingno as msno\n", "from scipy import stats\n", "import pywt\n", "from ta.momentum import RSIIndicator, StochasticOscillator, WilliamsRIndicator, ROCIndicator\n", "from ta.trend import MACD, ADXIndicator\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, roc_auc_score, accuracy_score\n", "from sklearn.feature_selection import SelectFromModel\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import TimeSeriesSplit, GridSearchCV, cross_val_score\n", "from xgboost import XGBClassifier, plot_tree\n", "from mlxtend.feature_selection import SequentialFeatureSelector as SFS\n", "from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs\n", "from plotly.subplots import make_subplots\n", "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", "from plotly.validators.scatter.marker import SymbolValidator\n", "import ipywidgets as widgets\n", "from ipywidgets import interact, HBox, Label\n", "import plotly.graph_objs as go\n", "import warnings\n", "import sys\n", "import os\n", "\n", "init_notebook_mode(connected=True)\n", "\n", "os.environ[\"PATH\"] += os.pathsep + 'C:/Program Files/Graphviz/bin/'\n", "sns.set_context('notebook', rc={\"axes.titlesize\":14,\"axes.labelsize\":13})\n", "sns.set_style('white')\n", "%matplotlib inline\n", "bbva = ['#004481','#2DCCCD', '#D8BE75','#1973B8', '#5BBEFF', '#F7893B', '#02A5A5', '#48AE64', '#F8CD51', '#F78BE8'];\n", "sns.set_palette(bbva);\n", "sns.color_palette(bbva)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Data loading and some EDA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sentiment indicators" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def load_data(data='countries', data_type='News_Social', asset_code_id='US'):\n", " \"\"\"\n", " This function loads the sentiment indicators downloaded from Thomsom Reuters, \n", " i.e. the MarketPsych Sentiment Indicators related to countries, companies and currencies.\n", " \n", " Parameters\n", " ----------\n", " data: str\n", " One of 'countries', 'companies', 'currencies'.\n", " data_type: str\n", " Whether to filter info by data source. One of 'News', 'Social', 'News_Social'.\n", " asset_code_id: str\n", " Code of the asset / data. One of 'US', 'US500', 'USD'.\n", " \n", " Return\n", " ------\n", " Pandas dataframe.\n", " \"\"\"\n", " dirs = os.listdir('./')\n", " if data == 'countries':\n", " dim, file = 'COU', 'COU_CARGA_INICIAL.csv'\n", " sufix = '_USA'\n", " elif data == 'currencies':\n", " dim, file = 'CUR', 'CUR_CARGA_INICIAL.csv'\n", " sufix = '_USD'\n", " elif data == 'companies':\n", " dim, file = 'CMPNY', 'CMPNY_GRP_CARGA_INICIAL.csv'\n", " sufix = '_US500'\n", " \n", " dirs = [dire_x for dire_x in os.listdir('./') if dim in dire_x]\n", " dataset = pd.read_csv(file, sep=';')\n", " dataset = dataset[dataset.date <= '2020-03-30']\n", " \n", " for dire in dirs:\n", " if (dire != file):\n", " new_month = pd.read_csv(dire, delimiter='\\t')\n", " if len(new_month.columns) == 1:\n", " new_month = pd.read_csv(dire, delimiter=';')\n", " if 'date' not in new_month.columns:\n", " new_month['date'] = new_month.id.apply(lambda x: x[3:13])\n", " if 'asset_code_id' not in new_month.columns:\n", " new_month['asset_code_id'] = new_month.assetCode\n", " if 'data_type' not in new_month.columns:\n", " new_month['data_type'] = new_month.dataType\n", " if 'id_refinitiv' not in new_month.columns:\n", " new_month['id_refinitiv'] = new_month.id\n", " if 'system_version' not in new_month.columns:\n", " new_month['system_version'] = new_month.systemVersion\n", " if 'date_audit_laod' not in new_month.columns:\n", " new_month['date_audit_laod'] = 'NA'\n", " if 'process_audit_load' not in new_month.columns:\n", " new_month['process_audit_load'] = 'NA'\n", " new_month = new_month[dataset.columns]\n", " dataset = pd.concat([dataset, new_month], ignore_index=True)\n", "\n", " dataset = dataset[(dataset.data_type == data_type) & \n", " (dataset.asset_code_id == asset_code_id) &\n", " (dataset.date >= '2000-01-01')].sort_values(by='date')\n", " \n", " dataset['Date'] = pd.to_datetime(dataset.date)\n", " dataset.set_index('Date', inplace=True)\n", " dataset.drop(['date', 'asset_code_id', 'data_type', 'id_refinitiv',\n", " 'system_version', 'date_audit_laod', 'process_audit_load'], axis=1, inplace=True)\n", " dataset.columns = [col + sufix for col in dataset.columns]\n", " \n", " return dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Countries info\n", "countries = load_data(data='countries', data_type='News_Social', asset_code_id='US')\n", "#pf.ProfileReport(countries, explorative=True, minimal=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stockIndexBuzz_USAbondBuzz_USAratesBuzz_USAstockIndexSentiment_USAstockIndexOptimism_USAstockIndexTrust_USAstockIndexFear_USAstockIndexStress_USAstockIndexSurprise_USAstockIndexUncertainty_USA...bondUncertainty_USAbondDefault_USAbondPriceDirection_USAbondPriceForecast_USAbondVolatility_USAcentralBank_USAdebtDefault_USAinterestRates_USAinterestRatesForecast_USAmonetaryPolicyLooseVsTight_USA
count7548.0000007548.0000007548.0000007548.0000007548.0000007546.0000007521.0000007548.0000007455.0000007548.000000...7540.0000007487.0000007543.0000007215.0000007538.0000007548.0000007548.0000007416.0000006770.0000007432.000000
mean7766.8182435018.9808963059.338103-0.045077-0.0046020.0036620.0110270.0469690.0048450.029227...0.0202780.0180760.0088560.0007880.0282080.5827810.2539800.0262390.0074600.016272
std7423.2685805703.6820363620.8768170.1461760.0267620.0052050.0070440.0151330.0040110.027715...0.0086420.0122890.0300950.0049180.0117020.1313230.1320860.0386750.0127890.031317
min59.20000038.00000025.000000-0.466583-0.212583-0.035411-0.0049990.007194-0.0090650.001890...-0.014634-0.118110-0.142631-0.054225-0.0074910.0568780.013831-0.156575-0.080899-0.094737
25%2221.5750001746.250000634.875000-0.154254-0.0198180.0007080.0060110.0363670.0024230.016069...0.0147020.010157-0.006501-0.0013210.0203930.4989400.1509130.0047690.0006810.000000
50%5814.0000003864.0500001903.000000-0.042142-0.0046150.0025940.0094190.0449960.0039200.021073...0.0189280.0154560.0064000.0006470.0259820.5952170.2356060.0150380.0037300.006530
75%11281.9500006928.4500004178.7500000.0664060.0104330.0055660.0142520.0555820.0061440.029007...0.0242240.0225330.0234770.0028680.0336060.6784740.3393870.0364360.0104600.019464
max134839.000000126062.90000045542.5000000.3900250.2311040.0458730.0687370.1588990.0724780.267216...0.0988590.1255590.1890270.0453120.1532260.8927270.8355440.2578000.1038960.234619
\n", "

8 rows × 30 columns

\n", "
" ], "text/plain": [ " stockIndexBuzz_USA bondBuzz_USA ratesBuzz_USA \\\n", "count 7548.000000 7548.000000 7548.000000 \n", "mean 7766.818243 5018.980896 3059.338103 \n", "std 7423.268580 5703.682036 3620.876817 \n", "min 59.200000 38.000000 25.000000 \n", "25% 2221.575000 1746.250000 634.875000 \n", "50% 5814.000000 3864.050000 1903.000000 \n", "75% 11281.950000 6928.450000 4178.750000 \n", "max 134839.000000 126062.900000 45542.500000 \n", "\n", " stockIndexSentiment_USA stockIndexOptimism_USA stockIndexTrust_USA \\\n", "count 7548.000000 7548.000000 7546.000000 \n", "mean -0.045077 -0.004602 0.003662 \n", "std 0.146176 0.026762 0.005205 \n", "min -0.466583 -0.212583 -0.035411 \n", "25% -0.154254 -0.019818 0.000708 \n", "50% -0.042142 -0.004615 0.002594 \n", "75% 0.066406 0.010433 0.005566 \n", "max 0.390025 0.231104 0.045873 \n", "\n", " stockIndexFear_USA stockIndexStress_USA stockIndexSurprise_USA \\\n", "count 7521.000000 7548.000000 7455.000000 \n", "mean 0.011027 0.046969 0.004845 \n", "std 0.007044 0.015133 0.004011 \n", "min -0.004999 0.007194 -0.009065 \n", "25% 0.006011 0.036367 0.002423 \n", "50% 0.009419 0.044996 0.003920 \n", "75% 0.014252 0.055582 0.006144 \n", "max 0.068737 0.158899 0.072478 \n", "\n", " stockIndexUncertainty_USA ... bondUncertainty_USA bondDefault_USA \\\n", "count 7548.000000 ... 7540.000000 7487.000000 \n", "mean 0.029227 ... 0.020278 0.018076 \n", "std 0.027715 ... 0.008642 0.012289 \n", "min 0.001890 ... -0.014634 -0.118110 \n", "25% 0.016069 ... 0.014702 0.010157 \n", "50% 0.021073 ... 0.018928 0.015456 \n", "75% 0.029007 ... 0.024224 0.022533 \n", "max 0.267216 ... 0.098859 0.125559 \n", "\n", " bondPriceDirection_USA bondPriceForecast_USA bondVolatility_USA \\\n", "count 7543.000000 7215.000000 7538.000000 \n", "mean 0.008856 0.000788 0.028208 \n", "std 0.030095 0.004918 0.011702 \n", "min -0.142631 -0.054225 -0.007491 \n", "25% -0.006501 -0.001321 0.020393 \n", "50% 0.006400 0.000647 0.025982 \n", "75% 0.023477 0.002868 0.033606 \n", "max 0.189027 0.045312 0.153226 \n", "\n", " centralBank_USA debtDefault_USA interestRates_USA \\\n", "count 7548.000000 7548.000000 7416.000000 \n", "mean 0.582781 0.253980 0.026239 \n", "std 0.131323 0.132086 0.038675 \n", "min 0.056878 0.013831 -0.156575 \n", "25% 0.498940 0.150913 0.004769 \n", "50% 0.595217 0.235606 0.015038 \n", "75% 0.678474 0.339387 0.036436 \n", "max 0.892727 0.835544 0.257800 \n", "\n", " interestRatesForecast_USA monetaryPolicyLooseVsTight_USA \n", "count 6770.000000 7432.000000 \n", "mean 0.007460 0.016272 \n", "std 0.012789 0.031317 \n", "min -0.080899 -0.094737 \n", "25% 0.000681 0.000000 \n", "50% 0.003730 0.006530 \n", "75% 0.010460 0.019464 \n", "max 0.103896 0.234619 \n", "\n", "[8 rows x 30 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "countries.describe()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Currencies info\n", "currencies = load_data(data='currencies', data_type='News_Social', asset_code_id='USD')\n", "#pf.ProfileReport(currencies, minimal=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
buzz_USDsentiment_USDoptimism_USDjoy_USDloveHate_USDtrust_USDanger_USDconflict_USDfear_USDgloom_USD...emotionVsFact_USDmarketRisk_USDlongShort_USDlongShortForecast_USDpriceDirection_USDpriceForecast_USDvolatility_USDcarryTrade_USDcurrencyPegInstability_USDpriceMomentum_USD
count7548.0000007547.0000007527.0000006838.0000006917.0000007462.0000006926.0000007526.0000007335.0000007481.000000...7548.0000007544.0000007459.0000006982.0000007545.0000007502.0000007471.0000002034.0000004162.0000007443.000000
mean6291.483148-0.048832-0.0086210.0032360.0022510.0024210.0028560.0024250.0085480.021687...0.2484260.0352690.0020330.0004020.0194550.0008490.0258250.0007230.0005410.005659
std5878.6260960.0761550.0255360.0036360.0037050.0056280.0030060.0131540.0063220.010245...0.0688800.0254290.0080830.0036130.0453630.0108100.0111570.0013480.0027890.008824
min1.000000-0.405405-0.238095-0.009083-0.031250-0.140000-0.005576-0.177778-0.044444-0.088889...-0.104673-0.154762-0.111111-0.111111-0.195946-0.120690-0.009539-0.000789-0.025316-0.060606
25%1364.325000-0.097635-0.0207740.0012530.0009340.0000000.001248-0.0036650.0044410.015184...0.2077350.021048-0.001442-0.000481-0.010270-0.0035780.0193110.000121-0.0001490.000765
50%4119.500000-0.047602-0.0072430.0022050.0017590.0018260.0020900.0019810.0071530.019908...0.2432110.0358220.0020300.0003310.0184410.0011450.0250460.0002920.0001480.005420
75%10248.975000-0.0008460.0054010.0039540.0029250.0042990.0034790.0078520.0110700.025938...0.2815250.0507970.0054070.0012260.0476550.0057000.0309450.0007360.0007280.010466
max46081.3000000.5000000.2424240.0619580.1346580.0555560.0731320.2083330.0991960.121131...1.0000000.2000000.1739130.1333330.4000000.4000000.5000000.0181230.0476190.100000
\n", "

8 rows × 25 columns

\n", "
" ], "text/plain": [ " buzz_USD sentiment_USD optimism_USD joy_USD loveHate_USD \\\n", "count 7548.000000 7547.000000 7527.000000 6838.000000 6917.000000 \n", "mean 6291.483148 -0.048832 -0.008621 0.003236 0.002251 \n", "std 5878.626096 0.076155 0.025536 0.003636 0.003705 \n", "min 1.000000 -0.405405 -0.238095 -0.009083 -0.031250 \n", "25% 1364.325000 -0.097635 -0.020774 0.001253 0.000934 \n", "50% 4119.500000 -0.047602 -0.007243 0.002205 0.001759 \n", "75% 10248.975000 -0.000846 0.005401 0.003954 0.002925 \n", "max 46081.300000 0.500000 0.242424 0.061958 0.134658 \n", "\n", " trust_USD anger_USD conflict_USD fear_USD gloom_USD ... \\\n", "count 7462.000000 6926.000000 7526.000000 7335.000000 7481.000000 ... \n", "mean 0.002421 0.002856 0.002425 0.008548 0.021687 ... \n", "std 0.005628 0.003006 0.013154 0.006322 0.010245 ... \n", "min -0.140000 -0.005576 -0.177778 -0.044444 -0.088889 ... \n", "25% 0.000000 0.001248 -0.003665 0.004441 0.015184 ... \n", "50% 0.001826 0.002090 0.001981 0.007153 0.019908 ... \n", "75% 0.004299 0.003479 0.007852 0.011070 0.025938 ... \n", "max 0.055556 0.073132 0.208333 0.099196 0.121131 ... \n", "\n", " emotionVsFact_USD marketRisk_USD longShort_USD \\\n", "count 7548.000000 7544.000000 7459.000000 \n", "mean 0.248426 0.035269 0.002033 \n", "std 0.068880 0.025429 0.008083 \n", "min -0.104673 -0.154762 -0.111111 \n", "25% 0.207735 0.021048 -0.001442 \n", "50% 0.243211 0.035822 0.002030 \n", "75% 0.281525 0.050797 0.005407 \n", "max 1.000000 0.200000 0.173913 \n", "\n", " longShortForecast_USD priceDirection_USD priceForecast_USD \\\n", "count 6982.000000 7545.000000 7502.000000 \n", "mean 0.000402 0.019455 0.000849 \n", "std 0.003613 0.045363 0.010810 \n", "min -0.111111 -0.195946 -0.120690 \n", "25% -0.000481 -0.010270 -0.003578 \n", "50% 0.000331 0.018441 0.001145 \n", "75% 0.001226 0.047655 0.005700 \n", "max 0.133333 0.400000 0.400000 \n", "\n", " volatility_USD carryTrade_USD currencyPegInstability_USD \\\n", "count 7471.000000 2034.000000 4162.000000 \n", "mean 0.025825 0.000723 0.000541 \n", "std 0.011157 0.001348 0.002789 \n", "min -0.009539 -0.000789 -0.025316 \n", "25% 0.019311 0.000121 -0.000149 \n", "50% 0.025046 0.000292 0.000148 \n", "75% 0.030945 0.000736 0.000728 \n", "max 0.500000 0.018123 0.047619 \n", "\n", " priceMomentum_USD \n", "count 7443.000000 \n", "mean 0.005659 \n", "std 0.008824 \n", "min -0.060606 \n", "25% 0.000765 \n", "50% 0.005420 \n", "75% 0.010466 \n", "max 0.100000 \n", "\n", "[8 rows x 25 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "currencies.describe()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [], "source": [ "companies = load_data(data='companies', data_type='News_Social', asset_code_id='MPTRXUS500')\n", "#pf.ProfileReport(companies, minimal=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Market data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HighLowOpenCloseVolumeAdj Close
Date
1999-11-301410.5899661386.9499511407.8299561388.9100349.515000e+081388.910034
1999-12-011400.1199951387.3800051388.9100341397.7199718.840000e+081397.719971
1999-12-021409.0400391397.7199711397.7199711409.0400399.007000e+081409.040039
1999-12-031447.4200441409.0400391409.0400391433.3000491.006400e+091433.300049
1999-12-04NaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " High Low Open Close Volume \\\n", "Date \n", "1999-11-30 1410.589966 1386.949951 1407.829956 1388.910034 9.515000e+08 \n", "1999-12-01 1400.119995 1387.380005 1388.910034 1397.719971 8.840000e+08 \n", "1999-12-02 1409.040039 1397.719971 1397.719971 1409.040039 9.007000e+08 \n", "1999-12-03 1447.420044 1409.040039 1409.040039 1433.300049 1.006400e+09 \n", "1999-12-04 NaN NaN NaN NaN NaN \n", "\n", " Adj Close \n", "Date \n", "1999-11-30 1388.910034 \n", "1999-12-01 1397.719971 \n", "1999-12-02 1409.040039 \n", "1999-12-03 1433.300049 \n", "1999-12-04 NaN " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Later on we will adapt dates to the data loaded before\n", "start_date = datetime(1999, 11, 30)\n", "end_date = datetime(2020, 8, 31)\n", "\n", "# SP500 Yahoo Finance\n", "sp500_yahoo = pdr.get_data_yahoo(symbols='^GSPC', start=start_date, end=end_date)\n", "sp500_yahoo = sp500_yahoo.asfreq('D', method=None) # generating extra days so that we don't have date jumps\n", "display(sp500_yahoo.head())\n", "\n", "# Adding return and volatility just for plotting (as this has to be calculated separately in train and test)\n", "original_columns = list(sp500_yahoo.columns)\n", "original_columns.remove('Volume') # unrealiable volume data. It will not be used\n", "sp500_yahoo['Daily Return'] = sp500_yahoo['Adj Close'].pct_change(periods=1)*100\n", "sp500_yahoo['Daily Volatility'] = sp500_yahoo['Daily Return'].ewm(span=30).std() # exponential moving std\n", "sp500_yahoo['Daily Expected Return'] = sp500_yahoo['Daily Return'].ewm(span=30).mean()\n", "\n", "def daterange(start_date, end_date):\n", " for n in range(int ((end_date - start_date).days) + 1):\n", " yield start_date + timedelta(n)\n", "\n", "weekend = [6, 7]\n", "weekdays = []\n", "for dt in daterange(start_date, end_date):\n", " if dt.isoweekday() not in weekend:\n", " weekdays.append(dt.strftime('%Y-%m-%d'))\n", "\n", "# We'll take only weekdays and we'll delete weekends (as markets are closed during these days)\n", "sp500_yahoo = sp500_yahoo[sp500_yahoo.index.isin(weekdays)]\n", "\n", "# Expected Volatility of SP500\n", "#vix = pdr.get_data_yahoo(symbols='^VIX', start=start_date, end=end_date)\n", "#vix = vix.asfreq('D', method='ffill')\n", "#display(vix.head())\n", "\n", "# añadimos vix a sp500\n", "#sp500['VIX Close'] = vix.Close" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Plotting price, volatility and return\n", "fig, ax1 = plt.subplots()\n", "\n", "ax1.set_xlabel('Date')\n", "ax1.set_ylabel('Daily Volatility [%]')\n", "ax1.plot(sp500_yahoo['Daily Volatility'], label='Daily Volatility', color='lightgrey')\n", "ax1.legend(loc='upper left')\n", "\n", "ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis\n", "\n", "ax2.set_ylabel('S&P 500 Close Price') # we already handled the x-label with ax1\n", "ax2.plot(sp500_yahoo['Close'], color=bbva[0], label='S&P 500 Close Price')\n", "ax2.legend(loc='upper right')\n", "\n", "fig.tight_layout() # otherwise the right y-label is slightly clipped\n", "plt.show()\n", "\n", "_, ax = plt.subplots()\n", "ax.plot(sp500_yahoo['Daily Return'], color=bbva[0], label='Daily Return');\n", "#ax.fill_between(sp500_yahoo.index, \n", "# sp500_yahoo['Daily Mean Return'] + sp500_yahoo['Daily Volatility'], \n", "# sp500_yahoo['Daily Mean Return'] - sp500_yahoo['Daily Volatility'],\n", "# color='lightgrey', label='Daily Volatility');\n", "ax.plot(sp500_yahoo['Daily Expected Return'] + sp500_yahoo['Daily Volatility'], color='lightgrey')\n", "ax.plot(sp500_yahoo['Daily Expected Return'] - sp500_yahoo['Daily Volatility'], color='lightgrey', label='Volatility Bands')\n", "ax.plot(sp500_yahoo['Daily Expected Return'], color=bbva[1], label='Daily Expected Return')\n", "ax.legend(loc='upper right')\n", "ax.set_ylabel('Daily Return [%]');\n", "ax.set_xlabel('Date');\n", "#plt.xticks(rotation=30)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "92737a74566544cf9c3426dbb1160e55", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Select(description='View', options=('Close', 'Daily Return', 'Daily Volatility'), value='Close'…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bed5e674f0e647a28999c3ee690a4eda", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Calculating events with cusum filter (just for plotting)\n", "cusum_events = mlfin.filters.cusum_filter(sp500_yahoo['Adj Close'], \n", " threshold=0.1) #threshold abs(change)\n", "\n", "# interactive plot\n", "warnings.filterwarnings(\"ignore\")\n", "#configure_plotly_browser_state()\n", "\n", "# creating widgets\n", "dependent=widgets.Select(options=['Close', 'Daily Return', 'Daily Volatility'],\n", " value='Close', description='View', disabled=False)\n", "dataframe=widgets.RadioButtons(options=['Companies', 'Countries', 'Currencies'], \n", " value='Companies', description='Indices', disabled=False)\n", "sentiment1=widgets.Dropdown(options=companies.columns,\n", " value='sentiment_US500', description='Comp. Value', disabled=False)\n", "sentiment2=widgets.Dropdown(options=countries.columns,\n", " value='stockIndexSentiment_USA', description='Count. Value', disabled=False)\n", "sentiment3=widgets.Dropdown(options=currencies.columns,\n", " value='sentiment_USD', description='Curr. Value',\n", " disabled=False, layout={'positioning': 'right'})\n", "\n", "# setting the ui for our widgets\n", "ui = widgets.HBox([dependent, dataframe, widgets.VBox([sentiment1, sentiment2, sentiment3])])\n", "\n", "#@interact\n", "def plot_sentiment_index(dependent, dataframe, sentiment1, sentiment2, sentiment3):\n", " \n", " if dataframe == 'Companies':\n", " sentiment = sentiment1\n", " df = companies\n", " elif dataframe == 'Countries':\n", " sentiment = sentiment2\n", " df = countries\n", " elif dataframe == 'Currencies':\n", " sentiment = sentiment3\n", " df = currencies\n", " \n", " figura = make_subplots(specs=[[{\"secondary_y\": True}]])\n", "\n", " figura.add_trace(go.Scatter(y=sp500_yahoo[dependent].fillna('ffill'),\n", " x=sp500_yahoo.index,\n", " mode='lines',\n", " name='S&P 500 '+ dependent),\n", " secondary_y=True,)\n", " figura.add_trace(\n", " go.Scatter(y=df[sentiment],\n", " x=df.index,\n", " mode='lines',\n", " name=sentiment + ' Index',\n", " visible='legendonly'),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=df[sentiment].ewm(span=365).mean(),\n", " x=df.index,\n", " mode='lines',\n", " name='EWMA 1y ' + sentiment[:5] + '. Index'),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=df[sentiment].ewm(span=180).mean(),\n", " x=df.index,\n", " mode='lines',\n", " name='EWMA 6m ' + sentiment[:5] + '. Index'),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=df[sentiment].ewm(span=90).mean(),\n", " x=df.index,\n", " mode='lines',\n", " name='EWMA 3m ' + sentiment[:5] + '. Index'),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=df[sentiment].ewm(span=30).mean(),\n", " x=df.index,\n", " mode='lines',\n", " name='EWMA 1m ' + sentiment[:5] + '. Index',\n", " visible='legendonly'),\n", " secondary_y=False,)\n", " figura.add_trace(go.Scatter(y=sp500_yahoo['Adj Close'][cusum_events],\n", " x=cusum_events,\n", " mode='markers',\n", " name='S&P 500 Index CUSUM Events'),\n", " secondary_y=True,)\n", " figura.add_trace(\n", " go.Scatter(y=[0],\n", " x=['2001-09-11'],\n", " mode='markers',\n", " name='Sept 11 Attacks',\n", " marker=dict(size=15),\n", " marker_symbol=17),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=[0],\n", " x=['2002-10-09'],\n", " mode='markers',\n", " name='Dot-Com Bubble Burst',\n", " marker=dict(size=15),\n", " marker_symbol=17),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=[0],\n", " x=['2008-09-15'],\n", " mode='markers',\n", " name='Lehman Brothers Collapse',\n", " marker=dict(size=15),\n", " marker_symbol=17),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=[0],\n", " x=['2018-12-22'],\n", " mode='markers',\n", " name='U.S. Federal Government Shutdown',\n", " marker=dict(size=15),\n", " marker_symbol=17),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=[0],\n", " x=['2020-01-20'],\n", " mode='markers',\n", " name='1st COVID-19 Case USA',\n", " marker=dict(size=15),\n", " marker_symbol=17),\n", " secondary_y=False,)\n", " \"\"\" \n", " # to use after results are generated\n", " dataset = sp500_yahoo.merge(test_labels,\n", " left_index=True,\n", " right_index=True,\n", " how='left')\n", " figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.bin == 1) & (dataset.side == 1)]['Adj Close'],\n", " x=dataset[(dataset.bin == 1) & (dataset.side == 1)].index,\n", " mode='markers',\n", " name='Buy',\n", " marker=dict(size=8, color='#008000'),\n", " marker_symbol=5),\n", " secondary_y=True,)\n", " figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.bin == 1) & (dataset.side == -1)]['Adj Close'],\n", " x=dataset[(dataset.bin == 1) & (dataset.side == -1)].index,\n", " mode='markers',\n", " name='Sell',\n", " marker=dict(size=8, color='#FF0000'),\n", " marker_symbol=6),\n", " secondary_y=True,)\n", " \"\"\"\n", "\n", " figura.update_layout(\n", " title_text='S&P 500 Index vs Sentiment Indices | Indicator: {}'.format(sentiment),\n", " colorway = bbva)\n", "\n", " figura.update_xaxes(rangeslider_visible=True)\n", " figura.update_yaxes(title_text=\"Sentiment Index\", secondary_y=False)\n", " figura.update_yaxes(title_text=\"S&P 500 Close Price\", secondary_y=True)\n", "\n", " figura.update_xaxes(\n", " rangeslider_visible=True,\n", " rangeselector=dict(\n", " dict(font = dict(color = \"black\")),\n", " buttons=list([\n", " dict(count=1, label=\"1m\", step=\"month\", stepmode=\"backward\"),\n", " dict(count=6, label=\"6m\", step=\"month\", stepmode=\"backward\"),\n", " dict(count=1, label=\"YTD\", step=\"year\", stepmode=\"todate\"),\n", " dict(count=1, label=\"1y\", step=\"year\", stepmode=\"backward\"),\n", " dict(count=3, label=\"3y\", step=\"year\", stepmode=\"backward\"),\n", " dict(count=5, label=\"5y\", step=\"year\", stepmode=\"backward\"),\n", " dict(step=\"all\"),\n", " ])\n", " )\n", " )\n", "\n", " figura.update_layout(template='simple_white', hovermode='x')\n", " iplot(figura)\n", " \n", "out = widgets.interactive_output(plot_sentiment_index, {'dependent': dependent, 'dataframe': dataframe, \n", " 'sentiment1': sentiment1, 'sentiment2': sentiment2,\n", " 'sentiment3': sentiment3})\n", "display(ui, out)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Data preparation" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# We'll only take the original columns (remember that we created three new ones just for plotting)\n", "sp500_yahoo = sp500_yahoo[original_columns]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
buzz_US500sentiment_US500optimism_US500joy_US500loveHate_US500trust_US500anger_US500conflict_US500fear_US500gloom_US500...bondUncertainty_USAbondDefault_USAbondPriceDirection_USAbondPriceForecast_USAbondVolatility_USAcentralBank_USAdebtDefault_USAinterestRates_USAinterestRatesForecast_USAmonetaryPolicyLooseVsTight_USA
Date
2000-01-0133223.80.0234020.0164940.0239590.0090600.0012110.0112120.0115130.0067270.027932...0.006748NaN-0.0539810.0269910.0148450.6779660.2203390.016949NaNNaN
2000-01-0236635.90.0081340.0145080.0152170.0108640.0005830.0100990.0153540.0072060.027487...0.023474NaN-0.014085NaN0.0281690.8409090.1022730.0056820.0113640.005682
2000-01-0357064.80.0176120.0146410.0160960.0084290.0007710.0104710.0148160.0069830.032209...0.0093920.000648-0.089708-0.0106870.0695640.7790700.0852710.0426360.011628-0.011628
2000-01-0479355.10.0008250.0069940.0151470.0095140.0017090.0109070.0128470.0071450.032575...0.0065940.004396-0.023380-0.0019980.0780940.7026710.1184670.1184670.015099-0.004646
2000-01-0591858.2-0.0134660.0011100.0141140.0082250.0006310.0109410.0117570.0073270.031995...0.0070300.0034090.008308-0.0012780.0607160.7359380.1156250.0953120.010156-0.007813
\n", "

5 rows × 90 columns

\n", "
" ], "text/plain": [ " buzz_US500 sentiment_US500 optimism_US500 joy_US500 \\\n", "Date \n", "2000-01-01 33223.8 0.023402 0.016494 0.023959 \n", "2000-01-02 36635.9 0.008134 0.014508 0.015217 \n", "2000-01-03 57064.8 0.017612 0.014641 0.016096 \n", "2000-01-04 79355.1 0.000825 0.006994 0.015147 \n", "2000-01-05 91858.2 -0.013466 0.001110 0.014114 \n", "\n", " loveHate_US500 trust_US500 anger_US500 conflict_US500 \\\n", "Date \n", "2000-01-01 0.009060 0.001211 0.011212 0.011513 \n", "2000-01-02 0.010864 0.000583 0.010099 0.015354 \n", "2000-01-03 0.008429 0.000771 0.010471 0.014816 \n", "2000-01-04 0.009514 0.001709 0.010907 0.012847 \n", "2000-01-05 0.008225 0.000631 0.010941 0.011757 \n", "\n", " fear_US500 gloom_US500 ... bondUncertainty_USA \\\n", "Date ... \n", "2000-01-01 0.006727 0.027932 ... 0.006748 \n", "2000-01-02 0.007206 0.027487 ... 0.023474 \n", "2000-01-03 0.006983 0.032209 ... 0.009392 \n", "2000-01-04 0.007145 0.032575 ... 0.006594 \n", "2000-01-05 0.007327 0.031995 ... 0.007030 \n", "\n", " bondDefault_USA bondPriceDirection_USA bondPriceForecast_USA \\\n", "Date \n", "2000-01-01 NaN -0.053981 0.026991 \n", "2000-01-02 NaN -0.014085 NaN \n", "2000-01-03 0.000648 -0.089708 -0.010687 \n", "2000-01-04 0.004396 -0.023380 -0.001998 \n", "2000-01-05 0.003409 0.008308 -0.001278 \n", "\n", " bondVolatility_USA centralBank_USA debtDefault_USA \\\n", "Date \n", "2000-01-01 0.014845 0.677966 0.220339 \n", "2000-01-02 0.028169 0.840909 0.102273 \n", "2000-01-03 0.069564 0.779070 0.085271 \n", "2000-01-04 0.078094 0.702671 0.118467 \n", "2000-01-05 0.060716 0.735938 0.115625 \n", "\n", " interestRates_USA interestRatesForecast_USA \\\n", "Date \n", "2000-01-01 0.016949 NaN \n", "2000-01-02 0.005682 0.011364 \n", "2000-01-03 0.042636 0.011628 \n", "2000-01-04 0.118467 0.015099 \n", "2000-01-05 0.095312 0.010156 \n", "\n", " monetaryPolicyLooseVsTight_USA \n", "Date \n", "2000-01-01 NaN \n", "2000-01-02 0.005682 \n", "2000-01-03 -0.011628 \n", "2000-01-04 -0.004646 \n", "2000-01-05 -0.007813 \n", "\n", "[5 rows x 90 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Merging the datasets\n", "sentiments = companies.merge(currencies.merge(countries, \n", " left_index=True, right_index=True), left_index=True, right_index=True)\n", "sentiments.drop_duplicates(inplace=True)\n", "sentiments.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# We'll calculate a weighted average on Mondays adding info from the weekend\n", "mondays_weekends = []\n", "for dt in daterange(start_date, end_date):\n", " if dt.isoweekday() in [1, 6, 7]:\n", " mondays_weekends.append(dt.strftime('%Y-%m-%d'))\n", "\n", "senti_mon = sentiments.reindex(pd.DatetimeIndex(mondays_weekends))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "Timestamp('1999-12-04 00:00:00')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# First day was Saturday (this will be useful for allocating weights)\n", "senti_mon.index[0]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
buzz_US500sentiment_US500optimism_US500joy_US500loveHate_US500trust_US500anger_US500conflict_US500fear_US500gloom_US500...bondUncertainty_USAbondDefault_USAbondPriceDirection_USAbondPriceForecast_USAbondVolatility_USAcentralBank_USAdebtDefault_USAinterestRates_USAinterestRatesForecast_USAmonetaryPolicyLooseVsTight_USA
Date
2000-01-0349709.5220.0162220.0148340.0168460.0090400.0007820.0104780.0145380.0070010.030657...0.012173NaN-0.068784NaN0.0538910.7805420.1052200.031424NaNNaN
2000-01-0479355.1000.0008250.0069940.0151470.0095140.0017090.0109070.0128470.0071450.032575...0.0065940.004396-0.023380-0.0019980.0780940.7026710.1184670.1184670.015099-0.004646
2000-01-0591858.200-0.0134660.0011100.0141140.0082250.0006310.0109410.0117570.0073270.031995...0.0070300.0034090.008308-0.0012780.0607160.7359380.1156250.0953120.010156-0.007813
2000-01-06105962.800-0.0034970.0052710.0154300.0078660.0016870.0118200.0122590.0063040.028685...0.0102030.0036010.0624210.0036010.0635020.6942390.1344170.0502220.0206790.000000
2000-01-07122253.000-0.0074310.0044740.0156070.0086420.0011270.0102940.0105190.0072100.032101...0.0158780.0017320.0164550.0025980.0594690.7654460.0949660.0686500.0194510.000000
\n", "

5 rows × 90 columns

\n", "
" ], "text/plain": [ " buzz_US500 sentiment_US500 optimism_US500 joy_US500 \\\n", "Date \n", "2000-01-03 49709.522 0.016222 0.014834 0.016846 \n", "2000-01-04 79355.100 0.000825 0.006994 0.015147 \n", "2000-01-05 91858.200 -0.013466 0.001110 0.014114 \n", "2000-01-06 105962.800 -0.003497 0.005271 0.015430 \n", "2000-01-07 122253.000 -0.007431 0.004474 0.015607 \n", "\n", " loveHate_US500 trust_US500 anger_US500 conflict_US500 \\\n", "Date \n", "2000-01-03 0.009040 0.000782 0.010478 0.014538 \n", "2000-01-04 0.009514 0.001709 0.010907 0.012847 \n", "2000-01-05 0.008225 0.000631 0.010941 0.011757 \n", "2000-01-06 0.007866 0.001687 0.011820 0.012259 \n", "2000-01-07 0.008642 0.001127 0.010294 0.010519 \n", "\n", " fear_US500 gloom_US500 ... bondUncertainty_USA \\\n", "Date ... \n", "2000-01-03 0.007001 0.030657 ... 0.012173 \n", "2000-01-04 0.007145 0.032575 ... 0.006594 \n", "2000-01-05 0.007327 0.031995 ... 0.007030 \n", "2000-01-06 0.006304 0.028685 ... 0.010203 \n", "2000-01-07 0.007210 0.032101 ... 0.015878 \n", "\n", " bondDefault_USA bondPriceDirection_USA bondPriceForecast_USA \\\n", "Date \n", "2000-01-03 NaN -0.068784 NaN \n", "2000-01-04 0.004396 -0.023380 -0.001998 \n", "2000-01-05 0.003409 0.008308 -0.001278 \n", "2000-01-06 0.003601 0.062421 0.003601 \n", "2000-01-07 0.001732 0.016455 0.002598 \n", "\n", " bondVolatility_USA centralBank_USA debtDefault_USA \\\n", "Date \n", "2000-01-03 0.053891 0.780542 0.105220 \n", "2000-01-04 0.078094 0.702671 0.118467 \n", "2000-01-05 0.060716 0.735938 0.115625 \n", "2000-01-06 0.063502 0.694239 0.134417 \n", "2000-01-07 0.059469 0.765446 0.094966 \n", "\n", " interestRates_USA interestRatesForecast_USA \\\n", "Date \n", "2000-01-03 0.031424 NaN \n", "2000-01-04 0.118467 0.015099 \n", "2000-01-05 0.095312 0.010156 \n", "2000-01-06 0.050222 0.020679 \n", "2000-01-07 0.068650 0.019451 \n", "\n", " monetaryPolicyLooseVsTight_USA \n", "Date \n", "2000-01-03 NaN \n", "2000-01-04 -0.004646 \n", "2000-01-05 -0.007813 \n", "2000-01-06 0.000000 \n", "2000-01-07 0.000000 \n", "\n", "[5 rows x 90 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Continuation:\n", "for col in senti_mon.columns:\n", " senti_mon[col] = \\\n", " senti_mon[col].rolling(3).apply(lambda x: np.average(x, weights=[0.12, 0.22, 0.66])) \n", " # damos mayor peso a los lunes (mon 2/3, sun 2/9, sat 1/9)\n", " \n", "# Substituting indices corresponding to senti_mon\n", "sentiments[sentiments.index.isin(mondays_weekends)] = senti_mon\n", "\n", "del senti_mon\n", "\n", "# Deleting weekends\n", "sentiments = sentiments[sentiments.index.isin(weekdays)]\n", "sentiments.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Selecting train and test periods" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# Splitting into train and test sets\n", "\n", "train_start, train_end = '2001-08-31', '2016-08-31'\n", "test_start, test_end = '2016-09-01', '2020-08-31'\n", "\n", "sentiments_test = sentiments[test_start:test_end]\n", "sentiments_train = sentiments[train_start:train_end]\n", "sp500_test = sp500_yahoo[test_start:test_end]\n", "sp500_train = sp500_yahoo[train_start:train_end]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "79%\n", "21%\n" ] } ], "source": [ "# Train and test ratios\n", "print(\"{0:.0%}\".format(len(sp500_train)/len(sp500_yahoo[train_start:])))\n", "print(\"{0:.0%}\".format(len(sp500_test)/len(sp500_yahoo[train_start:])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Missing values" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# We'll plot the rate of variables with missing values over time\n", "sentiments_train['year'] = sentiments_train.index.year\n", "sentiments_train['missing'] = sentiments_train.isnull().sum(axis=1)\n", "\n", "((1-(sentiments_train.groupby('year').count().min(axis=1)/\n", " sentiments_train.groupby('year').count()['bondBuzz_USA']))*100).plot(legend=False, marker='o');\n", "((1-(sentiments_train.groupby('year').count().mean(axis=1)/\n", " sentiments_train.groupby('year').count()['bondBuzz_USA']))*100).plot(legend=False, marker='o');\n", "plt.title('Max. and mean missing-value rates per year');\n", "plt.ylabel('Missing rate [%]');\n", "plt.xlabel('Date');\n", "\n", "plt.figure()\n", "plt.plot(sentiments_train.groupby('year').max().index, \n", " list(sentiments_train.groupby('year').max()['missing']), marker='o');\n", "plt.plot(sentiments_train.groupby('year').mean().index, \n", " list(sentiments_train.groupby('year').mean()['missing']), marker='o');\n", "plt.title('Max. and mean number of variables with missing values per year');\n", "plt.ylabel('Number of variables');\n", "plt.xlabel('Date');\n", "\n", "sentiments_train.drop(['year', 'missing'], axis=1, inplace=True)\n", "\n", "# y axis represents the percentage of variables with missing values for every year \n", "# disclaimer: these are not missing value rates, see section Data Loading for checking those" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['carryTrade_USD', 'currencyPegInstability_USD']\n" ] } ], "source": [ "# Dropping cols with a missing rate larger than 20%\n", "missing_rate = (sentiments_train.isnull().sum() / len(sentiments_train))*100\n", "drop_cols = [col for col in missing_rate.index if missing_rate[col] >= 20]\n", "print(drop_cols)\n", "sentiments_train.drop(drop_cols, axis=1, inplace=True)\n", "sentiments_test.drop(drop_cols, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Imputing missing values for the rest of variables\n", "# We'll perform a forward filling since we're dealing with news\n", "\n", "sentiments_train.fillna(method='ffill', inplace=True) \n", "sentiments_test.fillna(method='ffill', inplace=True) " ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "#msno.matrix(sp500_yahoo[['Close']])\n", "\n", "# We'll interpolate over the days were there is no data (be it due to the closing of markets in festive days or\n", "# just because of an absence of data due to system errors)\n", "\n", "sp500_train.interpolate(method='spline', order=3, limit_direction='forward', \n", " axis=0, inplace=True) \n", "sp500_test.interpolate(method='spline', order=3, limit_direction='forward', \n", " axis=0, inplace=True) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transformations" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Lambda: 0.2749559707497384\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# We'll apply yeo johnson to balance dispersion amongst varaibles\n", "# in this chunk we'll only apply it to the sentiments dataset. Later on we'll apply it to the financial variables\n", "#example\n", "fig = plt.figure()\n", "ax1 = fig.add_subplot(211)\n", "x = currencies.buzz_USD\n", "prob = stats.probplot(x, dist=stats.norm, plot=ax1)\n", "ax1.set_xlabel('')\n", "ax1.set_title('Probplot against normal distribution')\n", "\n", "ax2 = fig.add_subplot(212)\n", "xt, l = stats.yeojohnson(x)\n", "prob = stats.probplot(xt, dist=stats.norm, plot=ax2)\n", "ax2.set_title('Probplot after Box-Cox transformation')\n", "plt.show()\n", "print('Lambda: ', l)\n", "\n", "fig, ax = plt.subplots(2,1)\n", "ax[0].hist(x, bins=30);\n", "ax[1].hist(xt, bins=30);\n", "ax[0].set_title('Original and transformed variable');" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# We use the same lambdas for test dataset\n", "\n", "def apply_transformation(data_train, data_test, transformation):\n", " \"\"\"\n", " Applies dispersion and scale transformations on data split in train and test.\n", " \n", " Parameters\n", " ----------\n", " data_train: pandas dataframe\n", " Train set.\n", " data_test: pandas dataframe\n", " Test set.\n", " transformation: str\n", " One of 'dispersion', 'scale' or 'dispersion_and_scale'.\n", " \n", " Returns\n", " -------\n", " Tuple with train set and test set.\n", " \"\"\" \n", " \n", " data_train = data_train.dropna()\n", " data_test = data_test.dropna()\n", " \n", " index_train = data_train.index\n", " index_test = data_test.index\n", " \n", " if transformation == 'dispersion':\n", " for col in data_train.columns:\n", " data_train[col], fitted_lambda = stats.yeojohnson(data_train[col])\n", " data_test[col] = stats.yeojohnson(data_test[col], fitted_lambda)\n", " \n", " elif transformation == 'scale':\n", " scaler = StandardScaler().fit(data_train)\n", " std_train = scaler.transform(data_train)\n", " std_test = scaler.transform(data_test)\n", " data_train = pd.DataFrame(std_train, columns=data_train.columns)\n", " data_test = pd.DataFrame(std_test, columns=data_test.columns)\n", " data_train['Date'] = index_train\n", " data_test['Date'] = index_test\n", " data_train.set_index('Date', inplace=True)\n", " data_test.set_index('Date', inplace=True)\n", " \n", " elif transformation == 'dispersion_and_scale':\n", " for col in data_train.columns:\n", " data_train[col], fitted_lambda = stats.yeojohnson(data_train[col])\n", " data_test[col] = stats.yeojohnson(data_test[col], fitted_lambda)\n", " scaler = StandardScaler().fit(data_train)\n", " std_train = scaler.transform(data_train)\n", " std_test = scaler.transform(data_test)\n", " data_train = pd.DataFrame(std_train, columns=data_train.columns)\n", " data_test = pd.DataFrame(std_test, columns=data_test.columns)\n", " data_train['Date'] = index_train\n", " data_test['Date'] = index_test\n", " data_train.set_index('Date', inplace=True)\n", " data_test.set_index('Date', inplace=True)\n", " \n", " return data_train, data_test" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.904373699855421" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# checking linear correlation before transformation\n", "sentiments_train.optimism_US500.ewm(90).mean().corr(sp500_yahoo.Close, method='pearson')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
buzz_US500sentiment_US500optimism_US500joy_US500loveHate_US500trust_US500anger_US500conflict_US500fear_US500gloom_US500...bondUncertainty_USAbondDefault_USAbondPriceDirection_USAbondPriceForecast_USAbondVolatility_USAcentralBank_USAdebtDefault_USAinterestRates_USAinterestRatesForecast_USAmonetaryPolicyLooseVsTight_USA
count3.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+03...3.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+033.910000e+03
mean6.803878e-16-4.974424e-16-5.251554e-17-1.275252e-152.198156e-16-4.195280e-171.171271e-159.881269e-181.374348e-156.720399e-16...-3.112884e-164.515002e-16-2.040851e-191.891070e-176.554291e-16-1.608120e-161.097218e-152.304494e-165.224579e-18-3.448790e-16
std1.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+00...1.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+001.000128e+00
min-3.064271e+00-3.836744e+00-4.284522e+00-3.013036e+00-4.381088e+00-6.928434e+00-2.586858e+00-4.609965e+00-3.552208e+00-2.670639e+00...-3.163638e+00-8.062726e+00-6.847632e+00-6.703713e+00-4.173702e+00-3.048743e+00-2.270924e+00-9.733690e+00-1.687397e+01-9.594321e+00
25%-8.493998e-01-6.976976e-01-6.582821e-01-7.451503e-01-6.698294e-01-6.259648e-01-7.581757e-01-6.460022e-01-6.559033e-01-7.677362e-01...-6.686969e-01-6.951505e-01-4.799653e-01-4.732337e-01-7.013733e-01-6.927469e-01-7.495620e-01-5.990254e-01-4.879330e-01-6.143743e-01
50%3.815230e-02-7.880494e-02-1.911985e-02-7.581875e-022.395684e-03-3.272897e-02-8.365807e-021.615158e-021.833695e-02-2.910251e-02...-6.556898e-03-4.066074e-02-4.698007e-02-5.073219e-02-3.039403e-024.215866e-034.375376e-03-1.634409e-01-2.458350e-01-2.346027e-01
75%7.401832e-017.451540e-016.455215e-017.962038e-016.810202e-015.983299e-018.139464e-016.483610e-016.645912e-017.996133e-01...6.920137e-016.463170e-015.109784e-014.678733e-017.241609e-016.946301e-017.372704e-014.860234e-012.338346e-016.134070e-01
max2.942192e+003.112603e+003.886110e+003.362441e+004.069919e+005.643069e+002.700716e+004.335172e+003.704308e+003.576385e+00...3.779286e+002.888767e+004.780882e+005.367399e+002.825161e+002.955895e+003.014466e+002.787403e+005.097762e+002.267225e+00
\n", "

8 rows × 88 columns

\n", "
" ], "text/plain": [ " buzz_US500 sentiment_US500 optimism_US500 joy_US500 \\\n", "count 3.910000e+03 3.910000e+03 3.910000e+03 3.910000e+03 \n", "mean 6.803878e-16 -4.974424e-16 -5.251554e-17 -1.275252e-15 \n", "std 1.000128e+00 1.000128e+00 1.000128e+00 1.000128e+00 \n", "min -3.064271e+00 -3.836744e+00 -4.284522e+00 -3.013036e+00 \n", "25% -8.493998e-01 -6.976976e-01 -6.582821e-01 -7.451503e-01 \n", "50% 3.815230e-02 -7.880494e-02 -1.911985e-02 -7.581875e-02 \n", "75% 7.401832e-01 7.451540e-01 6.455215e-01 7.962038e-01 \n", "max 2.942192e+00 3.112603e+00 3.886110e+00 3.362441e+00 \n", "\n", " loveHate_US500 trust_US500 anger_US500 conflict_US500 \\\n", "count 3.910000e+03 3.910000e+03 3.910000e+03 3.910000e+03 \n", "mean 2.198156e-16 -4.195280e-17 1.171271e-15 9.881269e-18 \n", "std 1.000128e+00 1.000128e+00 1.000128e+00 1.000128e+00 \n", "min -4.381088e+00 -6.928434e+00 -2.586858e+00 -4.609965e+00 \n", "25% -6.698294e-01 -6.259648e-01 -7.581757e-01 -6.460022e-01 \n", "50% 2.395684e-03 -3.272897e-02 -8.365807e-02 1.615158e-02 \n", "75% 6.810202e-01 5.983299e-01 8.139464e-01 6.483610e-01 \n", "max 4.069919e+00 5.643069e+00 2.700716e+00 4.335172e+00 \n", "\n", " fear_US500 gloom_US500 ... bondUncertainty_USA bondDefault_USA \\\n", "count 3.910000e+03 3.910000e+03 ... 3.910000e+03 3.910000e+03 \n", "mean 1.374348e-15 6.720399e-16 ... -3.112884e-16 4.515002e-16 \n", "std 1.000128e+00 1.000128e+00 ... 1.000128e+00 1.000128e+00 \n", "min -3.552208e+00 -2.670639e+00 ... -3.163638e+00 -8.062726e+00 \n", "25% -6.559033e-01 -7.677362e-01 ... -6.686969e-01 -6.951505e-01 \n", "50% 1.833695e-02 -2.910251e-02 ... -6.556898e-03 -4.066074e-02 \n", "75% 6.645912e-01 7.996133e-01 ... 6.920137e-01 6.463170e-01 \n", "max 3.704308e+00 3.576385e+00 ... 3.779286e+00 2.888767e+00 \n", "\n", " bondPriceDirection_USA bondPriceForecast_USA bondVolatility_USA \\\n", "count 3.910000e+03 3.910000e+03 3.910000e+03 \n", "mean -2.040851e-19 1.891070e-17 6.554291e-16 \n", "std 1.000128e+00 1.000128e+00 1.000128e+00 \n", "min -6.847632e+00 -6.703713e+00 -4.173702e+00 \n", "25% -4.799653e-01 -4.732337e-01 -7.013733e-01 \n", "50% -4.698007e-02 -5.073219e-02 -3.039403e-02 \n", "75% 5.109784e-01 4.678733e-01 7.241609e-01 \n", "max 4.780882e+00 5.367399e+00 2.825161e+00 \n", "\n", " centralBank_USA debtDefault_USA interestRates_USA \\\n", "count 3.910000e+03 3.910000e+03 3.910000e+03 \n", "mean -1.608120e-16 1.097218e-15 2.304494e-16 \n", "std 1.000128e+00 1.000128e+00 1.000128e+00 \n", "min -3.048743e+00 -2.270924e+00 -9.733690e+00 \n", "25% -6.927469e-01 -7.495620e-01 -5.990254e-01 \n", "50% 4.215866e-03 4.375376e-03 -1.634409e-01 \n", "75% 6.946301e-01 7.372704e-01 4.860234e-01 \n", "max 2.955895e+00 3.014466e+00 2.787403e+00 \n", "\n", " interestRatesForecast_USA monetaryPolicyLooseVsTight_USA \n", "count 3.910000e+03 3.910000e+03 \n", "mean 5.224579e-18 -3.448790e-16 \n", "std 1.000128e+00 1.000128e+00 \n", "min -1.687397e+01 -9.594321e+00 \n", "25% -4.879330e-01 -6.143743e-01 \n", "50% -2.458350e-01 -2.346027e-01 \n", "75% 2.338346e-01 6.134070e-01 \n", "max 5.097762e+00 2.267225e+00 \n", "\n", "[8 rows x 88 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# We'll also apply standardization to even scales. This will make distributions more comparable\n", "sentiments_train, sentiments_test = apply_transformation(sentiments_train, sentiments_test, 'dispersion_and_scale')\n", "\n", "display(sentiments_train.describe())" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0.9131197876553399" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# checking linear correlation after transformation\n", "sentiments_train.optimism_US500.ewm(90).mean().corr(sp500_yahoo.Close, method='pearson') \n", "# correlation has improved a little" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Smoothing" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# testing fast fourier transform\n", "def filter_signal(signal, threshold=1e8):\n", " \"\"\"\n", " Performs a Fast Fourier Transform over a signal and returns filtered data.\n", " \n", " Parameters\n", " ----------\n", " signal: numpy array\n", " threshold: double\n", " \"\"\"\n", " fourier = np.fft.rfft(signal)\n", " frequencies = np.fft.rfftfreq(signal.size, d=1.)\n", " fourier[frequencies > threshold] = 0\n", " return np.fft.irfft(fourier)\n", "\n", "span = 500\n", "signal = np.array(sentiments_train.sentiment_US500[1:])\n", "threshold = 0.1\n", "filtered = filter_signal(signal, threshold=threshold)\n", "#plt.figure(figsize=(15, 10))\n", "plt.plot(signal[-span:], label='Raw')\n", "#plt.plot(signal[-100:], 'bo', label='Raw')\n", "plt.plot(filtered[-span:], label='Filtered')\n", "plt.plot(np.array(pd.Series(signal).ewm(22).mean())[-span:], label='22-day EWM')\n", "#plt.plot(np.array(sp500_train['Adj Close'][-span:]/max(sp500_train['Adj Close'][-span:])) - 0.8,\n", "# label='Adj Close Price (scaled)', color='gray')\n", "#plt.plot(filtered[-500:], 'ro', label='Filtered')\n", "plt.legend()\n", "plt.title(\"FFT Denoising with threshold = {} cycles per day\".format(threshold), size=15)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax1 = plt.subplots()\n", "\n", "ax1.set_xlabel('Date')\n", "ax1.set_ylabel('Sentiment Index')\n", "#ax1.plot(sentiments_train['sentiment_US500'][:300], label='Transf. EWMA Sentiment US500', color=bbva[2])\n", "ax1.plot(sentiments['sentiment_US500'][train_start:train_end][:300], label='Raw Sentiment US500', color=bbva[2])\n", "ax1.plot(sentiments['sentiment_US500'][train_start:train_end].ewm(22).mean()[:300], label='EWMA Sentiment US500', \n", " color=bbva[1])\n", "ax1.legend(loc='lower left')\n", "\n", "ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis\n", "\n", "ax2.set_ylabel('S&P 500 Close Price') # we already handled the x-label with ax1\n", "ax2.plot(sp500_train['Adj Close'][:300], color=bbva[0], label='S&P 500 Close Price')\n", "ax2.legend(loc='upper right');" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# Function for selecting smoothing technique\n", "def select_smoothing(data, technique, span0=22):\n", " \"\"\"\n", " Performs a smoothing (EWMA) or filtering (FFT) technique over data.\n", " \n", " Parameters\n", " ----------\n", " data: pandas dataframe column\n", " technique: str\n", " One of 'fft' for Fast Fourier Transform or 'ewma' for Exponentially Weighted Moving Average.\n", " Calls function filter_signal() for FFT.\n", " span0: int\n", " Decay in terms of span.\n", " \n", " Returns\n", " -------\n", " Pandas dataframe column.\n", " \"\"\"\n", " if technique == 'fft':\n", " filtered = list(filter_signal(data, threshold=threshold))\n", " filtered.append(0)\n", " elif technique == 'ewma':\n", " filtered = data.ewm(span=span0).mean()\n", " return filtered\n", "\n", "# we'll be using the ewma\n", "for col in sentiments_train.columns:\n", " sentiments_train[col] = select_smoothing(sentiments_train[col], 'ewma')\n", " sentiments_test[col] = select_smoothing(sentiments_test[col], 'ewma')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Feature engineering" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### PCA" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "%%capture\n", "\"\"\"\n", "def apply_pca(data, scale=False):\n", " # Separating out the features\n", " # already separated\n", " data = data.dropna()\n", " x = data\n", " \n", " # Standardizing the features\n", " if scale is True:\n", " x = StandardScaler().fit_transform(x) #already transformed\n", " pca = PCA(n_components=3)\n", " x_pca = pca.fit_transform(x)\n", " pDf = pd.DataFrame(data = x_pca, \n", " columns = ['pc1', 'pc2', 'pc3'])\n", "\n", " data.reset_index(inplace=True)\n", " data['pc1'] = pDf.pc1\n", " data['pc2'] = pDf.pc2\n", " data['pc3'] = pDf.pc3\n", " data.set_index('Date', inplace=True)\n", "\n", " print(pca.explained_variance_ratio_)\n", " \n", " plt.figure()\n", " plt.plot(np.cumsum(pca.explained_variance_ratio_))\n", " plt.xlabel('Number of Components')\n", " plt.ylabel('Variance (%)') \n", " plt.title('Explained Variance')\n", " plt.show()\n", "\n", " return data\n", "\n", "sentiments_train = apply_pca(sentiments_train, scale=False)\n", "sentiments_test = apply_pca(sentiments_test, scale=False)\"\"\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Technical indicators" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[TA Library Documentation](https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# We'll add some technical indicators used in trading for evaluating momemtum and trends\n", "def add_technical_indicators(data):\n", " \"\"\"\n", " Adds technical indicators widely used by traders when checking for bearish or bullish signals.\n", " \n", " Parameters\n", " ----------\n", " data: pandas dataframe\n", " \"\"\"\n", " \n", " # data cannot contain nans before fft\n", " data.dropna(inplace=True)\n", " #d = 10 # horizonte de predicción (aquí hacia atrás)\n", " #fft_close = filter_signal(data['Adj Close'])\n", " #data['FFT Discrete Close'] = [np.sign(fft_close[i] - fft_close[i - d])\n", " # if (i - d > 0) and (i < len(fft_close)) else np.nan\n", " # for i in range(0, len(fft_close) + 1)]\n", " #\n", " data['ROC'] = ROCIndicator(data['Adj Close'], 10).roc()\n", " data['RSI'] = RSIIndicator(data['Adj Close'], 10).rsi()\n", " data['Stoch'] = StochasticOscillator(high=data['High'], \n", " low=data['Low'], \n", " close=data['Close'], \n", " n=10).stoch()\n", " data['Williams'] = WilliamsRIndicator(high=data['High'], \n", " low=data['Low'], \n", " close=data['Close'], \n", " lbp=10).wr()\n", " data['MACD'] = MACD(data['Close'], \n", " n_slow = 22, \n", " n_fast = 8, \n", " n_sign = 5).macd()\n", " data['ADX'] = ADXIndicator(high=data['High'], \n", " low=data['Low'], \n", " close=data['Close'], n=10).adx()\n", " \n", " data['Close/Open'] = [1 if data.Close[x] > data.Open[x] else 0\n", " for x in range(0, len(data))]\n", " \n", " data['Daily Return'] = data['Adj Close'].pct_change(periods=1)*100\n", " data['Daily Volatility'] = data['Daily Return'].ewm(span=22).std() # exponential moving std\n", " \n", " ### only compute this cross if it won't later be the primary model!!\n", " fast_window = 20\n", " slow_window = 60\n", " col = 'Adj Close'\n", " data['Fast EWMA {}'.format(col)] = data[col] # already averaged\n", " data['Slow EWMA {}'.format(col)] = data[col].ewm(slow_window).mean()\n", "\n", " # Compute sides\n", " data['sp_cross_{}'.format(col)] = np.nan\n", "\n", " long_signals = data['Fast EWMA {}'.format(col)] >= data['Slow EWMA {}'.format(col)]\n", " short_signals = data['Fast EWMA {}'.format(col)] < data['Slow EWMA {}'.format(col)]\n", " data.loc[long_signals, 'sp_cross_{}'.format(col)] = 1\n", " data.loc[short_signals, 'sp_cross_{}'.format(col)] = -1\n", "\n", " # Lagging our trading signals by one day\n", " #data[['Fast EWMA', 'Slow EWMA']] = data[['Fast EWMA', 'Slow EWMA']].shift(1)\n", "\n", " data.drop(['Fast EWMA {}'.format(col), 'Slow EWMA {}'.format(col)], axis=1, inplace=True)\n", "\n", " return data\n", "\n", "sp500_train = add_technical_indicators(sp500_train)\n", "sp500_test = add_technical_indicators(sp500_test)\n", "\n", "sp500_train.Close[-100:].plot(legend=True);\n", "plt.figure()\n", "sp500_train.MACD[-100:].plot(legend=True);\n", "sp500_train.ADX[-100:].plot(legend=True);\n", "sp500_train.RSI[-100:].plot(legend=True);\n", "sp500_train.Stoch[-100:].plot(legend=True);\n", "sp500_train.Williams[-100:].plot(legend=True);" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# dropping close variable as it is the same as adjusted close here\n", "#sp500_train.drop(['High', 'Low', 'Open', 'Close'], axis=1, inplace=True)\n", "#sp500_test.drop(['High', 'Low', 'Open', 'Close'], axis=1, inplace=True)\n", "sp500_train.drop(['Close'], axis=1, inplace=True)\n", "sp500_test.drop(['Close'], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# We're shifting forward the financial variables (which are related to price) since news from the\n", "# previous day have to be used for predicting next day's prices\n", "sp500_train = sp500_train.shift(1) \n", "sp500_test = sp500_test.shift(1)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HighLowOpenAdj CloseROCRSIStochWilliamsMACDADXClose/OpenDaily ReturnDaily Volatilitysp_cross_Adj Close
Date
2001-08-31NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2001-09-031141.8299561126.3800051129.0300291133.579956NaNNaNNaNNaNNaN0.01.0NaNNaN1.0
2001-09-041185.4174361146.5632541142.7526761123.104688NaNNaNNaNNaNNaN0.00.0-0.924087NaN-1.0
2001-09-051155.4000241129.0600591133.5799561132.939941NaNNaNNaNNaNNaN0.00.00.8757201.2726561.0
2001-09-061135.5200201114.8599851132.9399411131.739990NaNNaNNaNNaNNaN0.00.0-0.1059150.8821871.0
\n", "
" ], "text/plain": [ " High Low Open Adj Close ROC RSI \\\n", "Date \n", "2001-08-31 NaN NaN NaN NaN NaN NaN \n", "2001-09-03 1141.829956 1126.380005 1129.030029 1133.579956 NaN NaN \n", "2001-09-04 1185.417436 1146.563254 1142.752676 1123.104688 NaN NaN \n", "2001-09-05 1155.400024 1129.060059 1133.579956 1132.939941 NaN NaN \n", "2001-09-06 1135.520020 1114.859985 1132.939941 1131.739990 NaN NaN \n", "\n", " Stoch Williams MACD ADX Close/Open Daily Return \\\n", "Date \n", "2001-08-31 NaN NaN NaN NaN NaN NaN \n", "2001-09-03 NaN NaN NaN 0.0 1.0 NaN \n", "2001-09-04 NaN NaN NaN 0.0 0.0 -0.924087 \n", "2001-09-05 NaN NaN NaN 0.0 0.0 0.875720 \n", "2001-09-06 NaN NaN NaN 0.0 0.0 -0.105915 \n", "\n", " Daily Volatility sp_cross_Adj Close \n", "Date \n", "2001-08-31 NaN NaN \n", "2001-09-03 NaN 1.0 \n", "2001-09-04 NaN -1.0 \n", "2001-09-05 1.272656 1.0 \n", "2001-09-06 0.882187 1.0 " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sp500_train.head()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "# Before continuing, we'll concat the financial and the sentiment datasets\n", "X_train = pd.concat([sp500_train, sentiments_train], axis=1)\n", "X_test = pd.concat([sp500_test, sentiments_test], axis=1)\n", "X_train.dropna(inplace=True) # there are na values at the beginning, for the newly created variables (technical indicators)\n", "X_test.dropna(inplace=True)\n", "\n", "# Now we'll create y_train and y_test, our labels" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### New variables (crossovers)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "# we'll add these ewmas as predictor or explanatory variables\n", "def add_crossing_ewmas(data, fast_window, slow_window):\n", " \"\"\"\n", " Adds two crossing exponentially weighted moving averages. \n", " \n", " Parameters\n", " ----------\n", " data: pandas dataframe\n", " fast_window: int\n", " Fast decay in terms of span.\n", " slow_window: int\n", " Slow decay in terms of span.\n", " \n", " Returns\n", " -------\n", " Pandas dataframe.\n", " \"\"\"\n", "\n", " #fast_window = 20 # 1 working month\n", " #slow_window = 90 # 5 working months\n", "\n", " for col in ['sentiment_US500', 'stockIndexSentiment_USA']:\n", "\n", " data['Fast EWMA {}'.format(col)] = data[col] # already averaged\n", " data['Slow EWMA {}'.format(col)] = data[col].ewm(slow_window).mean()\n", "\n", " # Compute sides\n", " data['cross_{}'.format(col)] = np.nan\n", "\n", " long_signals = data['Fast EWMA {}'.format(col)] >= data['Slow EWMA {}'.format(col)]\n", " short_signals = data['Fast EWMA {}'.format(col)] < data['Slow EWMA {}'.format(col)]\n", " data.loc[long_signals, 'cross_{}'.format(col)] = 1\n", " data.loc[short_signals, 'cross_{}'.format(col)] = -1\n", "\n", " # Lagging our trading signals by one day\n", " #data[['Fast EWMA', 'Slow EWMA']] = data[['Fast EWMA', 'Slow EWMA']].shift(1)\n", "\n", " data.drop(['Fast EWMA {}'.format(col), 'Slow EWMA {}'.format(col)], axis=1, inplace=True)\n", " \n", " return data\n", "\n", "X_train = add_crossing_ewmas(X_train, 10, 60)\n", "X_test = add_crossing_ewmas(X_test, 10, 60)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Labeling target variable" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# overriding function get_bins\n", "# we want this to return the sign of the return when the vertical barrier is touched first\n", "# instead of what it's currently implemented (0 if vertical barrier is touched first)\n", "def get_bins(triple_barrier_events, close):\n", " \"\"\"\n", " Advances in Financial Machine Learning, Snippet 3.7, page 51.\n", "\n", " Labeling for Side & Size with Meta Labels\n", "\n", " Compute event's outcome (including side information, if provided).\n", " events is a DataFrame where:\n", "\n", " Now the possible values for labels in out['bin'] are {0,1}, whether to take the bet or pass,\n", " a purely binary prediction. The previous feasible values were {−1,0,1}.\n", " The ML algorithm will be trained to decide if it's 1, and we can use the probability of this secondary prediction\n", " to derive the size of the bet, where the side (sign) of the position has been set by the primary model.\n", "\n", " :param triple_barrier_events: (pd.DataFrame)\n", " -events.index is event's starttime\n", " -events['t1'] is event's endtime\n", " -events['trgt'] is event's target\n", " -events['side'] (optional) implies the algo's position side\n", " Case 1: ('side' not in events): bin in (-1,1) <-label by price action\n", " Case 2: ('side' in events): bin in (0,1) <-label by pnl (meta-labeling)\n", " :param close: (pd.Series) Close prices\n", " :return: (pd.DataFrame) Meta-labeled events\n", " \"\"\"\n", "\n", " # 1) Align prices with their respective events\n", " events_ = triple_barrier_events.dropna(subset=['t1'])\n", " all_dates = events_.index.union(other=events_['t1'].array).drop_duplicates()\n", " prices = close.reindex(all_dates, method='bfill')\n", "\n", " # 2) Create out DataFrame\n", " out_df = pd.DataFrame(index=events_.index)\n", " # Need to take the log returns, else your results will be skewed for short positions\n", " #out_df['ret'] = np.log(prices.loc[events_['t1'].array].array) - np.log(prices.loc[events_.index])\n", " out_df['ret'] = prices.loc[events_['t1'].values].values / prices.loc[events_.index] - 1\n", " out_df['trgt'] = events_['trgt']\n", "\n", " # Meta labeling: Events that were correct will have pos returns\n", " if 'side' in events_:\n", " out_df['ret'] = out_df['ret'] * events_['side'] # meta-labeling\n", "\n", " # Added code: label 0 when vertical barrier reached\n", " #-------------------we change this step, as we want the outcome to be the sign of the return\n", " #out_df = barrier_touched(out_df, triple_barrier_events) \n", " out_df['bin'] = np.sign(out_df['ret'])\n", "\n", " # Meta labeling: label incorrect events with a 0\n", " if 'side' in events_:\n", " out_df.loc[out_df['ret'] <= 0, 'bin'] = 0\n", "\n", " # Transform the log returns back to normal returns.\n", " #out_df['ret'] = np.exp(out_df['ret']) - 1\n", "\n", " # Add the side to the output. This is useful for when a meta label model must be fit\n", " tb_cols = triple_barrier_events.columns\n", " if 'side' in tb_cols:\n", " out_df['side'] = triple_barrier_events['side']\n", "\n", " return out_df\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# Now we'll apply the labeling\n", "def apply_trading_labeling(data, \n", " compute_side=False, \n", " horizon=14,\n", " pt_sl=[1, 2], # multipliers for daily vol (contribution to horizontal barriers)\n", " min_ret=0.005,\n", " primary_model=False):\n", " \"\"\"\n", " Applies selected trading strategy with the given parameters. Labeling uses Triple Barrier Method.\n", "\n", " Parameters\n", " __________\n", " dataframe: pandas dataframe \n", " Data to use.\n", " compute_side: boolean\n", " Whether to use a primary model that tells the side (buy or sell).\n", " When True, a trend-following strategy will be applied as primary model.\n", " Default is False.\n", " horizon: int\n", " Prediction horizon in natural days.\n", " pt_sl: list\n", " Profit taking and stop loss multipliers to the volatility. Width of the TBM box.\n", " min_ret: float\n", " Minimum target return to run the search for triple barriers.\n", " primary_model: boolean\n", " Whether a primary model computed by the user has already decided the side.\n", " \n", " Returns\n", " -------\n", " Labels dataframe and triple-barrier events dataframe.\n", " \"\"\"\n", "\n", " ####--------------------- Primary models ------------------------####\n", " if compute_side is True:\n", " # compute exponentially moving averages\n", " fast_window = 20\n", " slow_window = 90 # optimize the span for fast and slow averages\n", "\n", " data['Fast EWMA'] = data['Adj Close'].ewm(fast_window).mean()\n", " data['Slow EWMA'] = data['Adj Close'].ewm(slow_window).mean()\n", " \n", " # Compute sides\n", " data['Side'] = np.nan\n", "\n", " long_signals = data['Fast EWMA'] >= data['Slow EWMA']\n", " short_signals = data['Fast EWMA'] < data['Slow EWMA']\n", " data.loc[long_signals, 'Side'] = 1\n", " data.loc[short_signals, 'Side'] = -1\n", "\n", " # Lagging our trading signals by one day\n", " data[['Fast EWMA', 'Slow EWMA']] = data[['Fast EWMA', 'Slow EWMA']].shift(1)\n", "\n", " data[['Fast EWMA', 'Slow EWMA']].plot();\n", " \n", " data.dropna(inplace=True)\n", "\n", " ####--------------------- CUSUM filters ------------------------####\n", " # Apply Symmetric CUSUM Filter and get timestamps for events\n", " cusum_events = mlfin.filters.cusum_filter(data['Adj Close'],\n", " threshold=data['Daily Volatility']/100)\n", "\n", " ####--------------------- Vertical barriers ------------------------####\n", " # Compute vertical barrier\n", " vertical_barriers = mlfin.labeling.add_vertical_barrier(t_events=cusum_events,\n", " close=data['Adj Close'],\n", " num_days=horizon) # this is the length of the tbm box\n", " \n", " ####--------------------- Triple barriers ------------------------####\n", " # Computing triple barriers\n", " if (compute_side is True) | (primary_model is True):\n", " triple_barrier_events = mlfin.labeling.get_events(close=data['Adj Close'],\n", " t_events=cusum_events,\n", " pt_sl=pt_sl, # profit taking and stop loss multiples\n", " target=data['Daily Volatility']/100, # values in conjunction with pt_sl for width of barrier\n", " min_ret=min_ret,\n", " num_threads=3, # num of parallel tasks\n", " vertical_barrier_times=vertical_barriers,\n", " side_prediction=data.Side)\n", " ####--------------------- Meta-labels ------------------------####\n", " # now we compute the meta-labelling\n", " meta_labeled_events = get_bins(triple_barrier_events, data['Adj Close'])\n", " \n", " else:\n", " triple_barrier_events = mlfin.labeling.get_events(close=data['Adj Close'],\n", " t_events=cusum_events,\n", " pt_sl=pt_sl, # profit taking and stop loss multiples\n", " target=data['Daily Volatility']/100, # values in conjunction with pt_sl for width of barrier\n", " min_ret=min_ret,\n", " num_threads=3, # num of parallel tasks\n", " vertical_barrier_times=vertical_barriers,\n", " side_prediction=None)\n", "\n", " ####--------------------- Side ------------------------####\n", " # now we compute the side \n", " # function that does meta-labeling returns side if no side prediction comes first\n", " meta_labeled_events = get_bins(triple_barrier_events, data['Adj Close'])\n", " meta_labeled_events['side'] = meta_labeled_events['bin']\n", " meta_labeled_events['bin'] = 1\n", "\n", " return meta_labeled_events, triple_barrier_events\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-11-05 19:11:32.864278 100.0% apply_pt_sl_on_t1 done after 0.09 minutes. Remaining 0.0 minutes..\n", "2021-11-05 19:11:38.634043 100.0% apply_pt_sl_on_t1 done after 0.08 minutes. Remaining 0.0 minutes..\n" ] } ], "source": [ "# computing labels for side (+1, -1) \n", "# side will tell the sign of the bet\n", "train_labels, tbm_train = apply_trading_labeling(X_train, \n", " primary_model=False, \n", " compute_side=False,\n", " horizon=14,\n", " pt_sl=[0, 0],\n", " min_ret=0.005)\n", "test_labels, tbm_test = apply_trading_labeling(X_test, \n", " primary_model=False, \n", " compute_side=False,\n", " horizon=14,\n", " pt_sl=[0, 0],\n", " min_ret=0.005)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rettrgtbinside
2001-10-040.0044860.01796711.0
2001-10-110.0038950.01589211.0
2001-10-17-0.0343950.0138101-1.0
2001-10-18-0.0160710.0146031-1.0
2001-10-230.0118730.01364811.0
\n", "
" ], "text/plain": [ " ret trgt bin side\n", "2001-10-04 0.004486 0.017967 1 1.0\n", "2001-10-11 0.003895 0.015892 1 1.0\n", "2001-10-17 -0.034395 0.013810 1 -1.0\n", "2001-10-18 -0.016071 0.014603 1 -1.0\n", "2001-10-23 0.011873 0.013648 1 1.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
t1trgtptsl
2001-10-042001-10-180.01796700
2001-10-112001-10-250.01589200
2001-10-172001-10-310.01381000
2001-10-182001-11-010.01460300
2001-10-232001-11-060.01364800
\n", "
" ], "text/plain": [ " t1 trgt pt sl\n", "2001-10-04 2001-10-18 0.017967 0 0\n", "2001-10-11 2001-10-25 0.015892 0 0\n", "2001-10-17 2001-10-31 0.013810 0 0\n", "2001-10-18 2001-11-01 0.014603 0 0\n", "2001-10-23 2001-11-06 0.013648 0 0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(train_labels.head())\n", "display(tbm_train.head())" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "27d0a025758c42199e0ade2059c7d564", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(RadioButtons(description='Data', index=1, options=('Train', 'Test'), value='Test'), Outp…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plotting labels to see outcome\n", "#configure_plotly_browser_state()\n", "@interact\n", "def plot_sentiment_index(data=widgets.RadioButtons(\n", " options=['Train', 'Test'],\n", " value='Test',\n", " # rows=10,\n", " description='Data',\n", " disabled=False)):\n", "\n", " figura = make_subplots(specs=[[{\"secondary_y\": False}]])\n", " if data == 'Test':\n", " dataset = X_test.merge(test_labels,\n", " left_index=True,\n", " right_index=True,\n", " how='left')\n", " figura.add_trace(go.Scatter(y=X_test['Adj Close'],\n", " x=X_test.index,\n", " mode='lines',\n", " name='S&P 500 Close Price'),\n", " secondary_y=False,)\n", " else:\n", " dataset = X_train.merge(train_labels,\n", " left_index=True,\n", " right_index=True,\n", " how='left')\n", " figura.add_trace(go.Scatter(y=X_train['Adj Close'],\n", " x=X_train.index,\n", " mode='lines',\n", " name='SP500 Close Price'),\n", " secondary_y=False,)\n", " if ('Fast EWMA' in dataset.columns) and ('Slow EWMA' in dataset.columns):\n", " figura.add_trace(go.Scatter(y=dataset['Fast EWMA'],\n", " x=dataset.index,\n", " mode='lines',\n", " name='SP500 Fast EWMA'),\n", " secondary_y=False,)\n", " figura.add_trace(go.Scatter(y=dataset['Slow EWMA'],\n", " x=dataset.index,\n", " mode='lines',\n", " name='SP500 Slow EWMA'),\n", " secondary_y=False,)\n", "\n", " figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.bin == 1) & (dataset.side == 1)]['Adj Close'],\n", " x=dataset[(dataset.bin == 1) & (dataset.side == 1)].index,\n", " mode='markers',\n", " name='Buy',\n", " marker=dict(size=8, color='#008000'),\n", " marker_symbol=5),\n", " secondary_y=False,)\n", " figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.bin == 1) & (dataset.side == -1)]['Adj Close'],\n", " x=dataset[(dataset.bin == 1) & (dataset.side == -1)].index,\n", " mode='markers',\n", " name='Sell',\n", " marker=dict(size=8, color='#FF0000'),\n", " marker_symbol=6),\n", " secondary_y=False,)\n", "\n", " figura.update_layout(\n", " title_text='S&P 500 Index and labeled positions | {}'.format(data),\n", " colorway = bbva)\n", "\n", " figura.update_xaxes(rangeslider_visible=True)\n", " figura.update_yaxes(title_text=\"S&P 500 Close Price\", secondary_y=False)\n", "\n", " figura.update_xaxes(\n", " rangeslider_visible=True,\n", " rangeselector=dict(\n", " dict(font = dict(color = \"black\")),\n", " buttons=list([\n", " dict(count=1, label=\"1m\", step=\"month\", stepmode=\"backward\"),\n", " dict(count=6, label=\"6m\", step=\"month\", stepmode=\"backward\"),\n", " dict(count=1, label=\"YTD\", step=\"year\", stepmode=\"todate\"),\n", " dict(count=1, label=\"1y\", step=\"year\", stepmode=\"backward\"),\n", " dict(count=3, label=\"3y\", step=\"year\", stepmode=\"backward\"),\n", " dict(count=5, label=\"5y\", step=\"year\", stepmode=\"backward\"),\n", " dict(step=\"all\"),\n", " ])\n", " )\n", " )\n", "\n", " figura.update_layout(template='simple_white', hovermode='x')\n", " iplot(figura)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train:\n", " ret\n", "label \n", "-1.0 621\n", " 1.0 867\n", "\n", "Test:\n", " ret\n", "label \n", "-1.0 77\n", " 1.0 178\n" ] } ], "source": [ "# number of observations per label\n", "train_labels['label'] = train_labels['bin'] * train_labels['side']\n", "print('Train:\\n', train_labels.groupby('label').count()[['ret']])\n", "\n", "test_labels['label'] = test_labels['bin'] * test_labels['side']\n", "print('\\nTest:\\n', test_labels.groupby('label').count()[['ret']])" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# scaling price variables since we've already computed our sign labels\n", "X_train[sp500_train.columns], X_test[sp500_test.columns] = apply_transformation(X_train[sp500_train.columns],\n", " X_test[sp500_test.columns],\n", " 'dispersion_and_scale')" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HighLowOpenAdj CloseROCRSIStochWilliamsMACDADX...bondPriceDirection_USAbondPriceForecast_USAbondVolatility_USAcentralBank_USAdebtDefault_USAinterestRates_USAinterestRatesForecast_USAmonetaryPolicyLooseVsTight_USAcross_sentiment_US500cross_stockIndexSentiment_USA
2001-10-04-0.807969-0.873574-0.871239-0.7861301.8085820.4225971.1650981.293337-1.006822-1.353784...1.1664410.3179780.8917990.537591-0.174197-0.519892-0.3406900.1613531.01.0
2001-10-11-0.782429-0.827435-0.848776-0.7511632.5139950.5516181.1284321.220651-0.226302-1.530948...0.0713500.2107730.600524-0.1291030.380515-0.996558-0.762076-0.4167521.01.0
2001-10-17-0.701903-0.691118-0.714693-0.6858551.3788150.8513561.0359131.0591610.705309-0.859799...0.3310340.6417300.572074-0.3704640.629447-0.831857-0.514771-0.3779191.01.0
2001-10-18-0.680350-0.732335-0.685032-0.766768-0.016683-0.079297-0.407315-0.5295350.534852-0.897141...0.4456000.6900180.573608-0.3571950.643653-0.797990-0.467564-0.3784491.01.0
2001-10-23-0.746187-0.755141-0.780502-0.7158210.7146420.3518890.3073520.1460690.269745-1.549311...0.3062660.5271520.434079-0.6046990.790327-0.724182-0.378894-0.3863941.01.0
\n", "

5 rows × 104 columns

\n", "
" ], "text/plain": [ " High Low Open Adj Close ROC RSI \\\n", "2001-10-04 -0.807969 -0.873574 -0.871239 -0.786130 1.808582 0.422597 \n", "2001-10-11 -0.782429 -0.827435 -0.848776 -0.751163 2.513995 0.551618 \n", "2001-10-17 -0.701903 -0.691118 -0.714693 -0.685855 1.378815 0.851356 \n", "2001-10-18 -0.680350 -0.732335 -0.685032 -0.766768 -0.016683 -0.079297 \n", "2001-10-23 -0.746187 -0.755141 -0.780502 -0.715821 0.714642 0.351889 \n", "\n", " Stoch Williams MACD ADX ... \\\n", "2001-10-04 1.165098 1.293337 -1.006822 -1.353784 ... \n", "2001-10-11 1.128432 1.220651 -0.226302 -1.530948 ... \n", "2001-10-17 1.035913 1.059161 0.705309 -0.859799 ... \n", "2001-10-18 -0.407315 -0.529535 0.534852 -0.897141 ... \n", "2001-10-23 0.307352 0.146069 0.269745 -1.549311 ... \n", "\n", " bondPriceDirection_USA bondPriceForecast_USA bondVolatility_USA \\\n", "2001-10-04 1.166441 0.317978 0.891799 \n", "2001-10-11 0.071350 0.210773 0.600524 \n", "2001-10-17 0.331034 0.641730 0.572074 \n", "2001-10-18 0.445600 0.690018 0.573608 \n", "2001-10-23 0.306266 0.527152 0.434079 \n", "\n", " centralBank_USA debtDefault_USA interestRates_USA \\\n", "2001-10-04 0.537591 -0.174197 -0.519892 \n", "2001-10-11 -0.129103 0.380515 -0.996558 \n", "2001-10-17 -0.370464 0.629447 -0.831857 \n", "2001-10-18 -0.357195 0.643653 -0.797990 \n", "2001-10-23 -0.604699 0.790327 -0.724182 \n", "\n", " interestRatesForecast_USA monetaryPolicyLooseVsTight_USA \\\n", "2001-10-04 -0.340690 0.161353 \n", "2001-10-11 -0.762076 -0.416752 \n", "2001-10-17 -0.514771 -0.377919 \n", "2001-10-18 -0.467564 -0.378449 \n", "2001-10-23 -0.378894 -0.386394 \n", "\n", " cross_sentiment_US500 cross_stockIndexSentiment_USA \n", "2001-10-04 1.0 1.0 \n", "2001-10-11 1.0 1.0 \n", "2001-10-17 1.0 1.0 \n", "2001-10-18 1.0 1.0 \n", "2001-10-23 1.0 1.0 \n", "\n", "[5 rows x 104 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# eliminating label 0 (0 would mean that pct change between days is null)\n", "train_labels = train_labels[train_labels.label != 0]\n", "test_labels = test_labels[test_labels.label != 0]\n", "\n", "# downsampling with events\n", "X_train = X_train.reindex(train_labels.index)\n", "X_test = X_test.reindex(test_labels.index)\n", "\n", "X_train.head()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "#X_train.to_csv('x_train_pt_sl.csv', index=False)\n", "#X_test.to_csv('x_test_pt_sl.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Model" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# applying our model\n", "def apply_model(train_labels, test_labels, X_train_, X_test_, model, scoring, sfs=True):\n", " \"\"\"\n", " Applies model RF or XGB to data with Cross-Validation. It may perform variable selection on demand.\n", " \n", " Parameters\n", " ----------\n", " train_labels: pandas dataframe\n", " Dataframe containing train labels as returned by apply_trading_labeling.\n", " test_labels: pandas dataframe\n", " Dataframe containing test labels as returned by apply_trading_labeling.\n", " X_train_: pandas dataframe\n", " Training data.\n", " X_test_: pandas dataframe\n", " Test data.\n", " model: str\n", " One of 'RF' for Random Forest or 'XGB' for Extreme Gradient Boosting.\n", " scoring: str\n", " One of the admitted possibilities in sklearn's GridSearchCV .\n", " sfs: boolean\n", " Whether to perfrom Sequential Forward Selection with a simple RF for variable selection.\n", " It will select from 10 to 30 total variables.\n", " \n", " Returns\n", " -------\n", " Best estimator from CV and list of selected variables.\n", " \"\"\"\n", " \n", " y_test = test_labels.label\n", " y_train = train_labels.label\n", " \n", " pos_ratio = round(train_labels[train_labels.label <= 0][['label']].count() / \n", " train_labels[train_labels.label > 0][['label']].count(), 1)[0]\n", " \n", " if sfs is True:\n", " # Sequential Forward Floating Selection\n", " sffs = SFS(RandomForestClassifier(n_jobs=-1, random_state=1, n_estimators=40, max_depth=2,\n", " class_weight='balanced_subsample'), \n", " k_features=(10, 30), \n", " forward=True, \n", " floating=True, \n", " scoring=scoring,\n", " cv=5,\n", " n_jobs=-1,\n", " verbose=0)\n", "\n", " sffs = sffs.fit(X_train_, y_train)\n", "\n", " print('\\nSequential Forward Floating Selection (k=30):')\n", " sffs_score = sffs.k_score_\n", " print('CV Score: %.2f' % sffs_score)\n", "\n", " fig1 = plot_sfs(sffs.get_metric_dict(), kind='std_dev')\n", "\n", " #plt.ylim([0.8, 1])\n", " plt.title('Sequential Forward Selection (w. StdDev)')\n", " #plt.grid()\n", " plt.show()\n", "\n", " selected_cols = []\n", " for i, col in enumerate(X_train_.columns):\n", " if i in sffs.k_feature_idx_:\n", " selected_cols.append(col)\n", "\n", " X_train_ = X_train_[selected_cols]\n", " X_test_ = X_test_[selected_cols]\n", " else:\n", " selected_cols = X_train_.columns\n", " \n", " # Fitting model:\n", " if model == 'RF': \n", " parameters = {'n_estimators': [40, 60, 100, 150, 180],\n", " 'max_depth':[2, 3, 4, 5],\n", " 'min_samples_split': [4, 6],\n", " 'min_samples_leaf': [1, 2],\n", " 'ccp_alpha': [0, 0.01, 0.02]}\n", " \n", " clf = RandomForestClassifier(n_jobs=-1, oob_score=True, criterion='gini', \n", " random_state=1, class_weight='balanced_subsample')\n", " \n", " gridcv = GridSearchCV(clf, parameters, cv=TimeSeriesSplit(max_train_size=None, n_splits=5), \n", " scoring=scoring, verbose=1, n_jobs=-1, refit=True)\n", " elif model == 'XGB':\n", " parameters = {'n_estimators': [40, 60, 100, 180, 1000],\n", " 'max_depth':[2, 3, 4, 5],\n", " 'eta': [0.00005, 0.0005],\n", " 'base_score': [0, 0.5, 1],\n", " 'early_stopping_rounds':[5, 10]}#,\n", " #'scale_pos_weight': [0.5, 0.8, 1]}\n", " \n", " clf = XGBClassifier(objective='binary:logistic', predictor='gpu_predictor',\n", " random_state=1, min_child_weight=2, scale_pos_weight=pos_ratio)\n", " \n", " # As this is a time-series problem, we do not shuffle samples for Cross-Validation,\n", " # and test always with newer registers:\n", " gridcv = GridSearchCV(clf, parameters, cv=TimeSeriesSplit(max_train_size=None, n_splits=3), \n", " verbose=1, n_jobs=-1, refit=True, scoring=scoring, return_train_score=True)\n", "\n", " gridcv.fit(X_train_, y_train)\n", "\n", " # Results:\n", " best_estimator = gridcv.best_estimator_\n", " #cv_results, cv_results_index = gridcv.cv_results_, gridcv.best_index_\n", " \n", " scores = cross_val_score(best_estimator, X_train_, y_train,\n", " cv=TimeSeriesSplit(max_train_size=None, n_splits=3), scoring='f1')\n", " print(\"Train F1-score: \", scores.mean())\n", " scores = cross_val_score(best_estimator, X_train_, y_train,\n", " cv=TimeSeriesSplit(max_train_size=None, n_splits=3), scoring='accuracy')\n", " print(\"Train accuracy: \", scores.mean())\n", " scores = cross_val_score(best_estimator, X_train_, y_train,\n", " cv=TimeSeriesSplit(max_train_size=None, n_splits=3), scoring='roc_auc')\n", " print(\"Train AUC: \", scores.mean())\n", "\n", "\n", " print(\"Train best score %.2f\" % gridcv.best_score_)\n", " print('Best parameters: {}'.format(gridcv.best_params_))\n", "\n", " predictions = best_estimator.predict(X_test_)\n", " accuracy = accuracy_score(y_test, predictions)*100\n", " print(\"Accuracy: %.2f%%\" % accuracy)\n", " y_pred = gridcv.predict_proba(X_test_)[:,1]\n", " print(\"AUC: %.2f\" % roc_auc_score(y_test, y_pred))\n", " if model == 'RF':\n", " print(\"OOB Score: %.2f\" % best_estimator.oob_score_)\n", " print(\"Classification report:\")\n", " print(classification_report(y_test, predictions))\n", "\n", " conf_mat = confusion_matrix(y_test, predictions)\n", " print(\"Confusion matrix:\")\n", " print(conf_mat)\n", " #print(gridcv.cv_results_)\n", " \n", " with plt.style.context('seaborn-poster'):\n", " features = X_train_.columns\n", " plt.title('Feature Importances')\n", " pd.Series(best_estimator.feature_importances_, index=features).nlargest(10).plot(kind='barh')\n", " plt.xlabel('Relative Importance')\n", " plt.show()\n", " \n", " return best_estimator, selected_cols" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# choosing variables\n", "def choose_variables(X_train, X_test, var_type='technical'):\n", " \"\"\"\n", " Selects variables distinguishing between technical indicators and sentiment indices.\n", " \n", " Parameters\n", " ----------\n", " X_train: pandas dataframe\n", " Train dataframe.\n", " X_test: pandas dataframe\n", " Test dataframe.\n", " var_type: str\n", " One of 'technical', 'sentiment' or 'all'.\n", " \n", " Returns\n", " -------\n", " Train and test dataframes with selected variables.\n", " \"\"\"\n", " sentiment_cols = [col for col in X_test.columns \n", " if col.endswith(('USA', 'US500', 'USD')) \n", " or col.startswith(('pc', 'cross'))]\n", " if var_type == 'technical':\n", " # only technical indicators or financial variables\n", " X_train_ = X_train[[col for col in X_train.columns if col not in sentiment_cols]]\n", " X_test_ = X_test[[col for col in X_test.columns if col not in sentiment_cols]]\n", " elif var_type == 'sentiment':\n", " # only sentiment variables\n", " X_train_ = X_train[sentiment_cols]\n", " X_test_ = X_test[sentiment_cols]\n", " elif var_type == 'all':\n", " X_train_ = X_train\n", " X_test_ = X_test\n", "\n", " return X_train_, X_test_\n", "\n", "X_train_, X_test_ = choose_variables(X_train, X_test, 'all')" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "# saving here the sffs selections for different configurations\n", "\n", "# without stop loss and profit taking limits\n", "features_all = [ 'ROC',\n", " 'ADX',\n", " 'Daily Return',\n", " 'fear_US500',\n", " 'fundamentalStrength_US500',\n", " 'optimism_USD',\n", " 'surprise_USD',\n", " 'timeUrgency_USD',\n", " 'longShort_USD',\n", " 'bondDefault_USA',\n", " 'bondPriceForecast_USA',\n", " 'interestRates_USA',\n", " 'cross_stockIndexSentiment_USA' ] #all selection\n", "features_sent = ['fear_US500',\n", " 'fundamentalStrength_US500',\n", " 'optimism_USD',\n", " 'surprise_USD',\n", " 'longShort_USD',\n", " 'priceForecast_USD',\n", " 'stockIndexStress_USA',\n", " 'bondUncertainty_USA',\n", " 'bondPriceForecast_USA',\n", " 'interestRates_USA',\n", " 'cross_stockIndexSentiment_USA'] #sentiment selection\n", "\n", "# with stop loss and profit taking limits\n", "features_ptsl_all = ['Stoch',\n", " 'Williams',\n", " 'MACD',\n", " 'ADX',\n", " 'Close/Open',\n", " 'sp_cross_Adj Close',\n", " 'longShortForecast_US500',\n", " 'analystRating_US500',\n", " 'dividends_US500',\n", " 'stress_USD',\n", " 'stockIndexPriceDirection_USA',\n", " 'stockIndexPriceForecast_USA',\n", " 'cross_stockIndexSentiment_USA'] # pt sl all selection\n", "features_ptsl_sent = ['longShortForecast_US500',\n", " 'priceForecast_US500',\n", " 'analystRating_US500',\n", " 'trust_USD',\n", " 'stress_USD',\n", " 'surprise_USD',\n", " 'timeUrgency_USD',\n", " 'longShort_USD',\n", " 'volatility_USD',\n", " 'stockIndexPriceDirection_USA',\n", " 'bondUncertainty_USA',\n", " 'interestRates_USA',\n", " 'cross_stockIndexSentiment_USA'] # pt sl sentiment selection" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 3 folds for each of 240 candidates, totalling 720 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n", "[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 3.7s\n", "[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed: 38.7s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Train F1-score: 0.471397248125701\n", "Train accuracy: 0.5008960573476703\n", "Train AUC: 0.5531392883621549\n", "Train best score -0.72\n", "Best parameters: {'base_score': 0.5, 'early_stopping_rounds': 5, 'eta': 5e-05, 'max_depth': 2, 'n_estimators': 40}\n", "Accuracy: 49.80%\n", "AUC: 0.53\n", "Classification report:\n", " precision recall f1-score support\n", "\n", " -1.0 0.32 0.61 0.42 77\n", " 1.0 0.73 0.45 0.56 178\n", "\n", " accuracy 0.50 255\n", " macro avg 0.53 0.53 0.49 255\n", "weighted avg 0.61 0.50 0.52 255\n", "\n", "Confusion matrix:\n", "[[47 30]\n", " [98 80]]\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# applying first model for setting the side (sign) of the bet (buy or sell)\n", "first_model, features = apply_model(train_labels, test_labels, \n", " X_train_[features_all], X_test_[features_all], \n", " 'XGB', 'neg_log_loss', sfs=False)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# only for xgb\n", "# plotting decision trees from the model\n", "fig, ax = plt.subplots(figsize=(15, 15))\n", "plot_tree(first_model, num_trees=0, ax=ax); #first\n", "#plot_tree(first_model, num_trees=1, ax=ax, rankdir='LR'); #second\n", "#plot_tree(first_model, num_trees=2, ax=ax, rankdir='LR'); #third" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# generating meta-labels with previous forecast\n", "# these will be the labels for the next model\n", "tbm_train = tbm_train.reindex(X_train_.index)\n", "tbm_test = tbm_test.reindex(X_test_.index)\n", "\n", "tbm_train['side'] = first_model.predict(X_train_[features_all]) # outcome of first model\n", "tbm_test['side'] = first_model.predict(X_test_[features_all])\n", "\n", "metalabels_train = get_bins(tbm_train, sp500_train['Adj Close'])\n", "metalabels_test = get_bins(tbm_test, sp500_test['Adj Close'])\n", "metalabels_train['label'] = metalabels_train.bin\n", "metalabels_test['label'] = metalabels_test.bin\n", "metalabels_train.dropna(inplace=True)\n", "metalabels_test.dropna(inplace=True)\n", "\n", "X_train_ = X_train_.reindex(metalabels_train.index)\n", "X_test_ = X_test_.reindex(metalabels_test.index)\n", "X_train_['side'] = metalabels_train.side\n", "X_test_['side'] = metalabels_test.side" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-1., 1.])" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tbm_test.side.unique()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rettrgtbinside
label
0.0451451451451
1.01037103710371037
\n", "
" ], "text/plain": [ " ret trgt bin side\n", "label \n", "0.0 451 451 451 451\n", "1.0 1037 1037 1037 1037" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rettrgtbinside
label
0.0128128128128
1.0127127127127
\n", "
" ], "text/plain": [ " ret trgt bin side\n", "label \n", "0.0 128 128 128 128\n", "1.0 127 127 127 127" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(metalabels_train.groupby('label').count())\n", "display(metalabels_test.groupby('label').count())" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "# appending side or previous outcome to the explanatory variables of the second model\n", "features_all.append('side')" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 3 folds for each of 240 candidates, totalling 720 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n", "[Parallel(n_jobs=-1)]: Done 340 tasks | elapsed: 21.3s\n", "[Parallel(n_jobs=-1)]: Done 720 out of 720 | elapsed: 35.8s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Train F1-score: 0.6949332686001455\n", "Train accuracy: 0.5779569892473119\n", "Train AUC: 0.5135903756371402\n", "Train best score -0.68\n", "Best parameters: {'base_score': 0.5, 'early_stopping_rounds': 5, 'eta': 5e-05, 'max_depth': 3, 'n_estimators': 40}\n", "Accuracy: 67.84%\n", "AUC: 0.73\n", "Classification report:\n", " precision recall f1-score support\n", "\n", " 0.0 0.64 0.80 0.72 128\n", " 1.0 0.74 0.55 0.63 127\n", "\n", " accuracy 0.68 255\n", " macro avg 0.69 0.68 0.67 255\n", "weighted avg 0.69 0.68 0.67 255\n", "\n", "Confusion matrix:\n", "[[103 25]\n", " [ 57 70]]\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# training and testing second model for setting the size of the bet \n", "# (here, it will only tell whether to take the bet or not)\n", "second_model, _ = apply_model(metalabels_train, metalabels_test, \n", " X_train_[features_all], X_test_[features_all], \n", " 'XGB', 'neg_log_loss', sfs=False)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "# generating final predictions\n", "final_predictions = second_model.predict(X_test_[features_all]) * X_test_.side" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2020-02-12 -0.0\n", "2020-02-20 -0.0\n", "2020-02-24 -0.0\n", "2020-02-25 -0.0\n", "2020-02-26 -0.0\n", "2020-02-28 -0.0\n", "2020-03-03 -1.0\n", "2020-03-04 -0.0\n", "2020-03-05 -1.0\n", "2020-03-06 -0.0\n", "2020-03-10 -0.0\n", "2020-03-11 -1.0\n", "2020-03-12 -0.0\n", "2020-03-13 -1.0\n", "2020-03-16 -1.0\n", "2020-03-17 -1.0\n", "2020-03-23 -0.0\n", "2020-03-25 -1.0\n", "2020-03-27 -1.0\n", "2020-04-02 -0.0\n", "2020-04-07 -1.0\n", "2020-04-10 -0.0\n", "2020-04-20 -0.0\n", "2020-04-22 -1.0\n", "2020-04-27 -0.0\n", "2020-04-30 -0.0\n", "Name: side, dtype: float64" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's check if we have a reasonable/profitable outcome during the first stage of the covid-19 bear market\n", "final_predictions['2020-02-10': '2020-04-30']" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "mode": "lines", "name": "SP500 Close Price", "type": "scatter", "x": [ "2016-10-05T00:00:00", "2016-10-12T00:00:00", "2016-10-20T00:00:00", "2016-10-27T00:00:00", "2016-11-08T00:00:00", "2016-11-10T00:00:00", "2016-11-16T00:00:00", "2016-11-22T00:00:00", "2016-11-28T00:00:00", "2016-12-01T00:00:00", "2016-12-08T00:00:00", "2016-12-12T00:00:00", "2016-12-14T00:00:00", "2016-12-15T00:00:00", "2016-12-29T00:00:00", "2017-03-22T00:00:00", "2017-04-25T00:00:00", "2017-04-26T00:00:00", "2017-05-18T00:00:00", "2017-05-22T00:00:00", "2017-05-24T00:00:00", "2017-05-26T00:00:00", "2017-06-30T00:00:00", "2017-07-07T00:00:00", "2017-07-10T00:00:00", "2017-07-13T00:00:00", "2017-08-11T00:00:00", "2017-08-15T00:00:00", "2017-08-18T00:00:00", "2017-08-23T00:00:00", "2017-08-31T00:00:00", "2017-09-01T00:00:00", "2017-09-05T00:00:00", "2017-09-12T00:00:00", "2018-01-30T00:00:00", "2018-01-31T00:00:00", "2018-02-05T00:00:00", "2018-02-06T00:00:00", "2018-02-07T00:00:00", "2018-02-09T00:00:00", "2018-02-13T00:00:00", "2018-02-16T00:00:00", "2018-02-26T00:00:00", "2018-03-01T00:00:00", "2018-03-02T00:00:00", "2018-03-06T00:00:00", "2018-03-12T00:00:00", "2018-03-15T00:00:00", "2018-03-20T00:00:00", "2018-03-23T00:00:00", "2018-03-26T00:00:00", "2018-03-27T00:00:00", "2018-03-28T00:00:00", "2018-04-02T00:00:00", "2018-04-03T00:00:00", "2018-04-05T00:00:00", "2018-04-09T00:00:00", "2018-04-11T00:00:00", "2018-04-17T00:00:00", "2018-04-23T00:00:00", "2018-04-25T00:00:00", "2018-04-27T00:00:00", "2018-05-03T00:00:00", "2018-05-07T00:00:00", "2018-05-10T00:00:00", "2018-05-11T00:00:00", "2018-05-22T00:00:00", "2018-05-29T00:00:00", "2018-05-31T00:00:00", "2018-06-04T00:00:00", "2018-06-07T00:00:00", "2018-06-20T00:00:00", "2018-06-22T00:00:00", "2018-06-26T00:00:00", "2018-06-28T00:00:00", "2018-07-02T00:00:00", "2018-07-06T00:00:00", "2018-07-09T00:00:00", "2018-07-10T00:00:00", "2018-07-12T00:00:00", "2018-07-13T00:00:00", "2018-07-19T00:00:00", "2018-07-26T00:00:00", "2018-07-30T00:00:00", "2018-07-31T00:00:00", "2018-08-03T00:00:00", "2018-08-16T00:00:00", "2018-08-17T00:00:00", "2018-08-21T00:00:00", "2018-10-11T00:00:00", "2018-10-12T00:00:00", "2018-10-15T00:00:00", "2018-10-17T00:00:00", "2018-10-19T00:00:00", "2018-10-25T00:00:00", "2018-10-26T00:00:00", "2018-10-29T00:00:00", "2018-10-31T00:00:00", "2018-11-02T00:00:00", "2018-11-08T00:00:00", "2018-11-13T00:00:00", "2018-11-19T00:00:00", "2018-11-20T00:00:00", "2018-11-21T00:00:00", "2018-11-27T00:00:00", "2018-11-29T00:00:00", "2018-12-04T00:00:00", "2018-12-05T00:00:00", "2018-12-10T00:00:00", "2018-12-17T00:00:00", "2018-12-18T00:00:00", "2018-12-20T00:00:00", "2018-12-21T00:00:00", "2018-12-24T00:00:00", "2018-12-25T00:00:00", "2018-12-26T00:00:00", "2018-12-27T00:00:00", "2019-01-02T00:00:00", "2019-01-04T00:00:00", "2019-01-07T00:00:00", "2019-01-10T00:00:00", "2019-01-18T00:00:00", "2019-01-21T00:00:00", "2019-01-23T00:00:00", "2019-01-28T00:00:00", "2019-01-31T00:00:00", "2019-02-05T00:00:00", "2019-02-08T00:00:00", "2019-02-13T00:00:00", "2019-02-18T00:00:00", "2019-02-26T00:00:00", "2019-03-04T00:00:00", "2019-03-07T00:00:00", "2019-03-08T00:00:00", "2019-03-12T00:00:00", "2019-03-14T00:00:00", "2019-03-19T00:00:00", "2019-03-22T00:00:00", "2019-03-25T00:00:00", "2019-04-01T00:00:00", "2019-04-02T00:00:00", "2019-04-08T00:00:00", "2019-04-15T00:00:00", "2019-05-06T00:00:00", "2019-05-08T00:00:00", "2019-05-14T00:00:00", "2019-05-16T00:00:00", "2019-05-21T00:00:00", "2019-05-24T00:00:00", "2019-05-29T00:00:00", "2019-06-03T00:00:00", "2019-06-05T00:00:00", "2019-06-07T00:00:00", "2019-06-10T00:00:00", "2019-06-19T00:00:00", "2019-06-21T00:00:00", "2019-06-26T00:00:00", "2019-07-01T00:00:00", "2019-07-02T00:00:00", "2019-07-04T00:00:00", "2019-07-09T00:00:00", "2019-07-11T00:00:00", "2019-07-15T00:00:00", "2019-07-18T00:00:00", "2019-07-22T00:00:00", "2019-07-24T00:00:00", "2019-07-26T00:00:00", "2019-07-29T00:00:00", "2019-08-01T00:00:00", "2019-08-02T00:00:00", "2019-08-05T00:00:00", "2019-08-06T00:00:00", "2019-08-07T00:00:00", "2019-08-09T00:00:00", "2019-08-13T00:00:00", "2019-08-14T00:00:00", "2019-08-15T00:00:00", "2019-08-19T00:00:00", "2019-08-26T00:00:00", "2019-08-29T00:00:00", "2019-09-02T00:00:00", "2019-09-05T00:00:00", "2019-09-06T00:00:00", "2019-09-13T00:00:00", "2019-09-25T00:00:00", "2019-09-30T00:00:00", "2019-10-02T00:00:00", "2019-10-03T00:00:00", "2019-10-07T00:00:00", "2019-10-09T00:00:00", "2019-10-11T00:00:00", "2019-10-14T00:00:00", "2019-10-16T00:00:00", "2019-10-25T00:00:00", "2019-10-29T00:00:00", "2019-11-04T00:00:00", "2019-11-08T00:00:00", "2019-12-05T00:00:00", "2019-12-09T00:00:00", "2019-12-13T00:00:00", "2020-01-28T00:00:00", "2020-01-29T00:00:00", "2020-02-03T00:00:00", "2020-02-05T00:00:00", "2020-02-06T00:00:00", "2020-02-12T00:00:00", "2020-02-20T00:00:00", "2020-02-24T00:00:00", "2020-02-25T00:00:00", "2020-02-26T00:00:00", "2020-02-28T00:00:00", "2020-03-03T00:00:00", "2020-03-04T00:00:00", "2020-03-05T00:00:00", "2020-03-06T00:00:00", "2020-03-10T00:00:00", "2020-03-11T00:00:00", "2020-03-12T00:00:00", "2020-03-13T00:00:00", "2020-03-16T00:00:00", "2020-03-17T00:00:00", "2020-03-23T00:00:00", "2020-03-25T00:00:00", "2020-03-27T00:00:00", "2020-04-02T00:00:00", "2020-04-07T00:00:00", "2020-04-10T00:00:00", "2020-04-20T00:00:00", "2020-04-22T00:00:00", "2020-04-27T00:00:00", "2020-04-30T00:00:00", "2020-05-04T00:00:00", "2020-05-11T00:00:00", "2020-05-14T00:00:00", "2020-05-19T00:00:00", "2020-05-27T00:00:00", "2020-06-01T00:00:00", "2020-06-04T00:00:00", "2020-06-08T00:00:00", "2020-06-12T00:00:00", "2020-06-16T00:00:00", "2020-06-24T00:00:00", "2020-06-25T00:00:00", "2020-06-29T00:00:00", "2020-07-01T00:00:00", "2020-07-06T00:00:00", "2020-07-15T00:00:00", "2020-07-21T00:00:00", "2020-07-24T00:00:00", "2020-07-30T00:00:00", "2020-08-04T00:00:00", "2020-08-06T00:00:00", "2020-08-11T00:00:00", "2020-08-12T00:00:00", "2020-08-13T00:00:00" ], "xaxis": "x", "y": [ 1.775862193759583, 1.7559295821283578, 1.7669044485223506, 1.7598556666462009, 1.7483328122830732, 1.7941925101118694, 1.818530976722427, 1.8435095389024307, 1.8645743482836645, 1.8443887939649266, 1.902898360067524, 1.9274040893213034, 1.9436727004801757, 1.9190123979109754, 1.91448662216106, 2.037574005835401, 2.075449889550896, 2.093375872223454, 2.0540166466187704, 2.084866833367072, 2.1054456595441873, 2.1257652286077375, 2.131378641964228, 2.119295307031279, 2.1380022241683667, 2.159687202863789, 2.153662841446222, 2.1864647619718443, 2.1438219306507103, 2.1707078797209, 2.1767277319269507, 2.193292876177753, 2.176987062769343, 2.212509740847792, 2.5958512503745683, 2.5661155096934833, 2.507055004703301, 2.3908655672182757, 2.4391619483286378, 2.3175314144753254, 2.398326428407332, 2.4760145804014537, 2.4922367674399215, 2.4583534453371563, 2.4210443591764186, 2.465602707449793, 2.531220777702435, 2.4944224114294027, 2.4574234137411266, 2.3852984077688055, 2.3255036185876117, 2.4010140300278757, 2.3520132844728967, 2.3851226221455715, 2.31849937952855, 2.3863600805386254, 2.343184839919537, 2.3992439100641785, 2.4212216768913244, 2.4131811155545044, 2.3755778855456877, 2.4098296133786787, 2.376762210573199, 2.406136000095508, 2.441895150600946, 2.46776914046249, 2.477845422034579, 2.414457691048197, 2.468724304558778, 2.479472508445129, 2.5171988373629945, 2.507512964732948, 2.494702988552028, 2.4616604434457527, 2.4437903182827747, 2.462985738226229, 2.4814815247090123, 2.504754750638326, 2.5288614970526617, 2.5188510170612473, 2.5426981274645915, 2.559539626112753, 2.588762441353397, 2.56263258215833, 2.546901076766248, 2.570726943829121, 2.5621979899666996, 2.5836328903251715, 2.5991866187692776, 2.530346147948126, 2.4731486514458907, 2.5120245676047293, 2.554017111201135, 2.513661751798842, 2.3984319974907695, 2.4498960927053073, 2.401161358138082, 2.4262059421839113, 2.4852715849119797, 2.5578650852564477, 2.470968135276078, 2.4811383385591963, 2.434604997650178, 2.3833858456504564, 2.4166416880514947, 2.4887121585317837, 2.534949978681769, 2.444233153583558, 2.373997461996785, 2.338270986221647, 2.2785594875944417, 2.2342865591635976, 2.188323853389332, 2.1276463288262466, 2.046538838295945, 2.116565946279799, 2.188653197846902, 2.263197258546287, 2.1652170247726574, 2.2627746635981247, 2.3218839505974436, 2.377071586929335, 2.4137775585222783, 2.3738049682634887, 2.407543027547659, 2.4245634226666724, 2.469597914230462, 2.4503885863676076, 2.489656663697386, 2.520412952165625, 2.540568717388234, 2.547962321101266, 2.516307643172054, 2.4938711633296555, 2.5280056911584805, 2.5549871394954775, 2.5762185030499167, 2.597131013870178, 2.5450590909173383, 2.5776175350316093, 2.608760685114831, 2.6326642259400654, 2.6462479872201023, 2.6811754868232347, 2.624569481635533, 2.555908382513759, 2.5934121508175703, 2.5831936843059213, 2.5659324117363855, 2.5466963175410324, 2.4970059372230184, 2.5475534910606448, 2.586304322082383, 2.6145432508888513, 2.655761670630701, 2.6888861963767767, 2.6554219959378815, 2.6776614377018246, 2.698007838873062, 2.7260151471681855, 2.708393670907265, 2.723586688479715, 2.741785620741309, 2.715926362089121, 2.7089819150652836, 2.734510696791031, 2.7329290424498063, 2.752329565012722, 2.7123373441319254, 2.6883276310385473, 2.668836870411125, 2.5874956475207482, 2.6224396342910916, 2.6743312308857243, 2.6233086139964756, 2.6636090434526523, 2.583547108890156, 2.6288867393462296, 2.589752385444303, 2.6281974038841094, 2.6637368568543027, 2.674049591436159, 2.7084382745890037, 2.7381081113927688, 2.7000415136447486, 2.695729522337985, 2.676291950827968, 2.6278900638494225, 2.6869301276572792, 2.6329616843341426, 2.6743673751243913, 2.70332446965937, 2.7258914756378094, 2.738739062759903, 2.764081309583493, 2.7876692253856854, 2.803173440522439, 2.8263225474739637, 2.853747930341794, 2.87224999562486, 2.9321631163780384, 2.957556946076665, 2.917896896469523, 2.9739806650731007, 3.002148708999684, 3.019424290578397, 3.0404603629674103, 3.0044513362657557, 2.9181894533195787, 2.839158053128113, 2.710896286218737, 2.8074350254256997, 2.7326654758073685, 2.8407384524185724, 2.7506591305364587, 2.4914942542315948, 2.6228694991910007, 2.486288109354795, 2.2038118577299106, 2.455480533109273, 2.090312626929054, 1.9873332691682966, 2.164550661707284, 2.370779107908172, 2.1919433151482925, 2.406409079720038, 2.5344106505188404, 2.615688128723679, 2.481431023890419, 2.579857739014421, 2.6756204444714062, 2.5740795250642607, 2.666785857072889, 2.5637717494004377, 2.6886429040988795, 2.722437496257588, 2.7683003651193236, 2.8347324540908305, 2.8927254254521104, 2.731548534087053, 2.7873966002371446, 2.841705701574125, 2.7734804874070784, 2.7376522496035665, 2.81589375278397, 2.8663291965974755, 2.8956045973939792, 2.9385919910370286, 2.925899262986658, 2.943742504013843, 2.9716977606021158, 2.996930062017773, 3.0214503905391417, 3.0013955524090314, 3.0361856680715213 ], "yaxis": "y" }, { "marker": { "color": "#2DCCCD", "size": 8, "symbol": 19 }, "mode": "markers", "name": "Real Buy", "type": "scatter", "x": [ "2016-10-12T00:00:00", "2016-10-27T00:00:00", "2016-11-08T00:00:00", "2016-11-10T00:00:00", "2016-11-16T00:00:00", "2016-11-22T00:00:00", "2016-11-28T00:00:00", "2016-12-01T00:00:00", "2016-12-08T00:00:00", "2016-12-12T00:00:00", "2016-12-29T00:00:00", "2017-03-22T00:00:00", "2017-04-25T00:00:00", "2017-04-26T00:00:00", "2017-05-18T00:00:00", "2017-05-22T00:00:00", "2017-05-24T00:00:00", "2017-05-26T00:00:00", "2017-06-30T00:00:00", "2017-07-07T00:00:00", "2017-07-10T00:00:00", "2017-07-13T00:00:00", "2017-08-11T00:00:00", "2017-08-18T00:00:00", "2017-08-23T00:00:00", "2017-08-31T00:00:00", "2017-09-01T00:00:00", "2017-09-05T00:00:00", "2017-09-12T00:00:00", "2018-02-06T00:00:00", "2018-02-07T00:00:00", "2018-02-09T00:00:00", "2018-02-13T00:00:00", "2018-02-26T00:00:00", "2018-03-01T00:00:00", "2018-03-02T00:00:00", "2018-03-23T00:00:00", "2018-03-26T00:00:00", "2018-03-28T00:00:00", "2018-04-02T00:00:00", "2018-04-03T00:00:00", "2018-04-05T00:00:00", "2018-04-09T00:00:00", "2018-04-25T00:00:00", "2018-04-27T00:00:00", "2018-05-03T00:00:00", "2018-05-07T00:00:00", "2018-05-10T00:00:00", "2018-05-11T00:00:00", "2018-05-22T00:00:00", "2018-05-29T00:00:00", "2018-05-31T00:00:00", "2018-06-04T00:00:00", "2018-06-26T00:00:00", "2018-06-28T00:00:00", "2018-07-02T00:00:00", "2018-07-06T00:00:00", "2018-07-09T00:00:00", "2018-07-10T00:00:00", "2018-07-12T00:00:00", "2018-07-13T00:00:00", "2018-07-26T00:00:00", "2018-07-30T00:00:00", "2018-07-31T00:00:00", "2018-08-03T00:00:00", "2018-08-16T00:00:00", "2018-08-17T00:00:00", "2018-08-21T00:00:00", "2018-10-25T00:00:00", "2018-10-26T00:00:00", "2018-10-29T00:00:00", "2018-10-31T00:00:00", "2018-11-19T00:00:00", "2018-11-20T00:00:00", "2018-11-21T00:00:00", "2018-12-20T00:00:00", "2018-12-24T00:00:00", "2018-12-25T00:00:00", "2018-12-26T00:00:00", "2018-12-27T00:00:00", "2019-01-02T00:00:00", "2019-01-04T00:00:00", "2019-01-07T00:00:00", "2019-01-10T00:00:00", "2019-01-18T00:00:00", "2019-01-21T00:00:00", "2019-01-23T00:00:00", "2019-01-28T00:00:00", "2019-01-31T00:00:00", "2019-02-05T00:00:00", "2019-02-08T00:00:00", "2019-02-13T00:00:00", "2019-02-18T00:00:00", "2019-03-04T00:00:00", "2019-03-07T00:00:00", "2019-03-08T00:00:00", "2019-03-12T00:00:00", "2019-03-19T00:00:00", "2019-03-22T00:00:00", "2019-03-25T00:00:00", "2019-04-01T00:00:00", "2019-04-02T00:00:00", "2019-04-08T00:00:00", "2019-04-15T00:00:00", "2019-05-14T00:00:00", "2019-05-24T00:00:00", "2019-05-29T00:00:00", "2019-06-03T00:00:00", "2019-06-05T00:00:00", "2019-06-07T00:00:00", "2019-06-10T00:00:00", "2019-06-19T00:00:00", "2019-06-21T00:00:00", "2019-06-26T00:00:00", "2019-07-01T00:00:00", "2019-07-02T00:00:00", "2019-07-09T00:00:00", "2019-07-11T00:00:00", "2019-07-15T00:00:00", "2019-08-06T00:00:00", "2019-08-07T00:00:00", "2019-08-15T00:00:00", "2019-08-19T00:00:00", "2019-08-26T00:00:00", "2019-08-29T00:00:00", "2019-09-02T00:00:00", "2019-09-05T00:00:00", "2019-09-06T00:00:00", "2019-09-30T00:00:00", "2019-10-02T00:00:00", "2019-10-03T00:00:00", "2019-10-07T00:00:00", "2019-10-09T00:00:00", "2019-10-11T00:00:00", "2019-10-14T00:00:00", "2019-10-16T00:00:00", "2019-10-25T00:00:00", "2019-10-29T00:00:00", "2019-11-04T00:00:00", "2019-11-08T00:00:00", "2019-12-05T00:00:00", "2019-12-09T00:00:00", "2019-12-13T00:00:00", "2020-01-28T00:00:00", "2020-01-29T00:00:00", "2020-02-03T00:00:00", "2020-02-05T00:00:00", "2020-02-06T00:00:00", "2020-03-13T00:00:00", "2020-03-17T00:00:00", "2020-03-23T00:00:00", "2020-03-25T00:00:00", "2020-03-27T00:00:00", "2020-04-02T00:00:00", "2020-04-07T00:00:00", "2020-04-10T00:00:00", "2020-04-22T00:00:00", "2020-04-27T00:00:00", "2020-05-04T00:00:00", "2020-05-11T00:00:00", "2020-05-14T00:00:00", "2020-05-19T00:00:00", "2020-05-27T00:00:00", "2020-06-12T00:00:00", "2020-06-24T00:00:00", "2020-06-25T00:00:00", "2020-06-29T00:00:00", "2020-07-01T00:00:00", "2020-07-06T00:00:00", "2020-07-15T00:00:00", "2020-07-21T00:00:00", "2020-07-24T00:00:00", "2020-07-30T00:00:00", "2020-08-04T00:00:00", "2020-08-06T00:00:00", "2020-08-11T00:00:00", "2020-08-12T00:00:00", "2020-08-13T00:00:00" ], "xaxis": "x", "y": [ 1.7559295821283578, 1.7598556666462009, 1.7483328122830732, 1.7941925101118694, 1.818530976722427, 1.8435095389024307, 1.8645743482836645, 1.8443887939649266, 1.902898360067524, 1.9274040893213034, 1.91448662216106, 2.037574005835401, 2.075449889550896, 2.093375872223454, 2.0540166466187704, 2.084866833367072, 2.1054456595441873, 2.1257652286077375, 2.131378641964228, 2.119295307031279, 2.1380022241683667, 2.159687202863789, 2.153662841446222, 2.1438219306507103, 2.1707078797209, 2.1767277319269507, 2.193292876177753, 2.176987062769343, 2.212509740847792, 2.3908655672182757, 2.4391619483286378, 2.3175314144753254, 2.398326428407332, 2.4922367674399215, 2.4583534453371563, 2.4210443591764186, 2.3852984077688055, 2.3255036185876117, 2.3520132844728967, 2.3851226221455715, 2.31849937952855, 2.3863600805386254, 2.343184839919537, 2.3755778855456877, 2.4098296133786787, 2.376762210573199, 2.406136000095508, 2.441895150600946, 2.46776914046249, 2.477845422034579, 2.414457691048197, 2.468724304558778, 2.479472508445129, 2.4616604434457527, 2.4437903182827747, 2.462985738226229, 2.4814815247090123, 2.504754750638326, 2.5288614970526617, 2.5188510170612473, 2.5426981274645915, 2.588762441353397, 2.56263258215833, 2.546901076766248, 2.570726943829121, 2.5621979899666996, 2.5836328903251715, 2.5991866187692776, 2.3984319974907695, 2.4498960927053073, 2.401161358138082, 2.4262059421839113, 2.4811383385591963, 2.434604997650178, 2.3833858456504564, 2.2342865591635976, 2.1276463288262466, 2.046538838295945, 2.116565946279799, 2.188653197846902, 2.263197258546287, 2.1652170247726574, 2.2627746635981247, 2.3218839505974436, 2.377071586929335, 2.4137775585222783, 2.3738049682634887, 2.407543027547659, 2.4245634226666724, 2.469597914230462, 2.4503885863676076, 2.489656663697386, 2.520412952165625, 2.547962321101266, 2.516307643172054, 2.4938711633296555, 2.5280056911584805, 2.5762185030499167, 2.597131013870178, 2.5450590909173383, 2.5776175350316093, 2.608760685114831, 2.6326642259400654, 2.6462479872201023, 2.555908382513759, 2.5659324117363855, 2.5466963175410324, 2.4970059372230184, 2.5475534910606448, 2.586304322082383, 2.6145432508888513, 2.655761670630701, 2.6888861963767767, 2.6554219959378815, 2.6776614377018246, 2.698007838873062, 2.708393670907265, 2.723586688479715, 2.741785620741309, 2.5874956475207482, 2.6224396342910916, 2.583547108890156, 2.6288867393462296, 2.589752385444303, 2.6281974038841094, 2.6637368568543027, 2.674049591436159, 2.7084382745890037, 2.695729522337985, 2.676291950827968, 2.6278900638494225, 2.6869301276572792, 2.6329616843341426, 2.6743673751243913, 2.70332446965937, 2.7258914756378094, 2.738739062759903, 2.764081309583493, 2.7876692253856854, 2.803173440522439, 2.8263225474739637, 2.853747930341794, 2.87224999562486, 2.9321631163780384, 2.957556946076665, 2.917896896469523, 2.9739806650731007, 3.002148708999684, 2.2038118577299106, 2.090312626929054, 1.9873332691682966, 2.164550661707284, 2.370779107908172, 2.1919433151482925, 2.406409079720038, 2.5344106505188404, 2.481431023890419, 2.579857739014421, 2.5740795250642607, 2.666785857072889, 2.5637717494004377, 2.6886429040988795, 2.722437496257588, 2.731548534087053, 2.841705701574125, 2.7734804874070784, 2.7376522496035665, 2.81589375278397, 2.8663291965974755, 2.8956045973939792, 2.9385919910370286, 2.925899262986658, 2.943742504013843, 2.9716977606021158, 2.996930062017773, 3.0214503905391417, 3.0013955524090314, 3.0361856680715213 ], "yaxis": "y" }, { "marker": { "color": "#D8BE75", "size": 8, "symbol": 20 }, "mode": "markers", "name": "Real Sell", "type": "scatter", "x": [ "2016-10-05T00:00:00", "2016-10-20T00:00:00", "2016-12-14T00:00:00", "2016-12-15T00:00:00", "2017-08-15T00:00:00", "2018-01-30T00:00:00", "2018-01-31T00:00:00", "2018-02-05T00:00:00", "2018-02-16T00:00:00", "2018-03-06T00:00:00", "2018-03-12T00:00:00", "2018-03-15T00:00:00", "2018-03-20T00:00:00", "2018-03-27T00:00:00", "2018-04-11T00:00:00", "2018-04-17T00:00:00", "2018-04-23T00:00:00", "2018-06-07T00:00:00", "2018-06-20T00:00:00", "2018-06-22T00:00:00", "2018-07-19T00:00:00", "2018-10-11T00:00:00", "2018-10-12T00:00:00", "2018-10-15T00:00:00", "2018-10-17T00:00:00", "2018-10-19T00:00:00", "2018-11-02T00:00:00", "2018-11-08T00:00:00", "2018-11-13T00:00:00", "2018-11-27T00:00:00", "2018-11-29T00:00:00", "2018-12-04T00:00:00", "2018-12-05T00:00:00", "2018-12-10T00:00:00", "2018-12-17T00:00:00", "2018-12-18T00:00:00", "2018-12-21T00:00:00", "2019-02-26T00:00:00", "2019-03-14T00:00:00", "2019-05-06T00:00:00", "2019-05-08T00:00:00", "2019-05-16T00:00:00", "2019-05-21T00:00:00", "2019-07-04T00:00:00", "2019-07-18T00:00:00", "2019-07-22T00:00:00", "2019-07-24T00:00:00", "2019-07-26T00:00:00", "2019-07-29T00:00:00", "2019-08-01T00:00:00", "2019-08-02T00:00:00", "2019-08-05T00:00:00", "2019-08-09T00:00:00", "2019-08-13T00:00:00", "2019-08-14T00:00:00", "2019-09-13T00:00:00", "2019-09-25T00:00:00", "2020-02-12T00:00:00", "2020-02-20T00:00:00", "2020-02-24T00:00:00", "2020-02-25T00:00:00", "2020-02-26T00:00:00", "2020-02-28T00:00:00", "2020-03-03T00:00:00", "2020-03-04T00:00:00", "2020-03-05T00:00:00", "2020-03-06T00:00:00", "2020-03-10T00:00:00", "2020-03-11T00:00:00", "2020-03-12T00:00:00", "2020-03-16T00:00:00", "2020-04-20T00:00:00", "2020-04-30T00:00:00", "2020-06-01T00:00:00", "2020-06-04T00:00:00", "2020-06-08T00:00:00", "2020-06-16T00:00:00" ], "xaxis": "x", "y": [ 1.775862193759583, 1.7669044485223506, 1.9436727004801757, 1.9190123979109754, 2.1864647619718443, 2.5958512503745683, 2.5661155096934833, 2.507055004703301, 2.4760145804014537, 2.465602707449793, 2.531220777702435, 2.4944224114294027, 2.4574234137411266, 2.4010140300278757, 2.3992439100641785, 2.4212216768913244, 2.4131811155545044, 2.5171988373629945, 2.507512964732948, 2.494702988552028, 2.559539626112753, 2.530346147948126, 2.4731486514458907, 2.5120245676047293, 2.554017111201135, 2.513661751798842, 2.4852715849119797, 2.5578650852564477, 2.470968135276078, 2.4166416880514947, 2.4887121585317837, 2.534949978681769, 2.444233153583558, 2.373997461996785, 2.338270986221647, 2.2785594875944417, 2.188323853389332, 2.540568717388234, 2.5549871394954775, 2.6811754868232347, 2.624569481635533, 2.5934121508175703, 2.5831936843059213, 2.7260151471681855, 2.715926362089121, 2.7089819150652836, 2.734510696791031, 2.7329290424498063, 2.752329565012722, 2.7123373441319254, 2.6883276310385473, 2.668836870411125, 2.6743312308857243, 2.6233086139964756, 2.6636090434526523, 2.7381081113927688, 2.7000415136447486, 3.019424290578397, 3.0404603629674103, 3.0044513362657557, 2.9181894533195787, 2.839158053128113, 2.710896286218737, 2.8074350254256997, 2.7326654758073685, 2.8407384524185724, 2.7506591305364587, 2.4914942542315948, 2.6228694991910007, 2.486288109354795, 2.455480533109273, 2.615688128723679, 2.6756204444714062, 2.7683003651193236, 2.8347324540908305, 2.8927254254521104, 2.7873966002371446 ], "yaxis": "y" }, { "marker": { "color": "#bfbfbf", "size": 5, "symbol": 0 }, "mode": "markers", "name": "Predicted Do Not Take Bet", "type": "scatter", "x": [ "2016-10-05T00:00:00", "2016-10-12T00:00:00", "2016-10-20T00:00:00", "2016-10-27T00:00:00", "2016-11-08T00:00:00", "2016-11-10T00:00:00", "2016-12-08T00:00:00", "2016-12-14T00:00:00", "2017-03-22T00:00:00", "2017-04-25T00:00:00", "2017-04-26T00:00:00", "2017-05-24T00:00:00", "2017-05-26T00:00:00", "2017-06-30T00:00:00", "2017-07-07T00:00:00", "2017-07-10T00:00:00", "2017-07-13T00:00:00", "2017-08-23T00:00:00", "2018-01-31T00:00:00", "2018-02-07T00:00:00", "2018-02-13T00:00:00", "2018-02-26T00:00:00", "2018-03-12T00:00:00", "2018-03-15T00:00:00", "2018-04-11T00:00:00", "2018-04-17T00:00:00", "2018-04-23T00:00:00", "2018-04-27T00:00:00", "2018-05-11T00:00:00", "2018-05-22T00:00:00", "2018-05-31T00:00:00", "2018-06-04T00:00:00", "2018-06-07T00:00:00", "2018-06-20T00:00:00", "2018-06-22T00:00:00", "2018-07-09T00:00:00", "2018-07-12T00:00:00", "2018-07-19T00:00:00", "2018-08-03T00:00:00", "2018-08-16T00:00:00", "2018-08-17T00:00:00", "2018-08-21T00:00:00", "2018-10-12T00:00:00", "2018-10-15T00:00:00", "2018-10-17T00:00:00", "2018-10-31T00:00:00", "2018-11-02T00:00:00", "2018-11-08T00:00:00", "2018-11-27T00:00:00", "2018-11-29T00:00:00", "2018-12-04T00:00:00", "2019-01-07T00:00:00", "2019-01-10T00:00:00", "2019-01-18T00:00:00", "2019-01-21T00:00:00", "2019-01-23T00:00:00", "2019-01-28T00:00:00", "2019-01-31T00:00:00", "2019-02-05T00:00:00", "2019-02-08T00:00:00", "2019-02-13T00:00:00", "2019-02-18T00:00:00", "2019-02-26T00:00:00", "2019-03-04T00:00:00", "2019-03-07T00:00:00", "2019-03-08T00:00:00", "2019-03-12T00:00:00", "2019-03-14T00:00:00", "2019-03-19T00:00:00", "2019-03-22T00:00:00", "2019-04-01T00:00:00", "2019-04-02T00:00:00", "2019-04-08T00:00:00", "2019-04-15T00:00:00", "2019-05-06T00:00:00", "2019-05-14T00:00:00", "2019-05-16T00:00:00", "2019-05-21T00:00:00", "2019-05-24T00:00:00", "2019-05-29T00:00:00", "2019-06-03T00:00:00", "2019-06-05T00:00:00", "2019-06-07T00:00:00", "2019-06-10T00:00:00", "2019-06-19T00:00:00", "2019-06-21T00:00:00", "2019-06-26T00:00:00", "2019-07-01T00:00:00", "2019-07-02T00:00:00", "2019-07-04T00:00:00", "2019-07-09T00:00:00", "2019-07-11T00:00:00", "2019-07-15T00:00:00", "2019-07-18T00:00:00", "2019-07-22T00:00:00", "2019-07-24T00:00:00", "2019-07-26T00:00:00", "2019-07-29T00:00:00", "2019-08-01T00:00:00", "2019-08-02T00:00:00", "2019-08-05T00:00:00", "2019-08-06T00:00:00", "2019-08-07T00:00:00", "2019-08-09T00:00:00", "2019-08-13T00:00:00", "2019-08-14T00:00:00", "2019-08-15T00:00:00", "2019-08-19T00:00:00", "2019-08-26T00:00:00", "2019-08-29T00:00:00", "2019-09-02T00:00:00", "2019-09-05T00:00:00", "2019-09-06T00:00:00", "2019-09-13T00:00:00", "2019-09-25T00:00:00", "2019-09-30T00:00:00", "2019-10-02T00:00:00", "2019-10-03T00:00:00", "2019-10-07T00:00:00", "2019-10-09T00:00:00", "2019-10-11T00:00:00", "2019-10-14T00:00:00", "2019-10-16T00:00:00", "2019-10-25T00:00:00", "2019-11-04T00:00:00", "2019-12-13T00:00:00", "2020-02-03T00:00:00", "2020-02-12T00:00:00", "2020-02-20T00:00:00", "2020-02-24T00:00:00", "2020-02-25T00:00:00", "2020-02-26T00:00:00", "2020-02-28T00:00:00", "2020-03-04T00:00:00", "2020-03-06T00:00:00", "2020-03-10T00:00:00", "2020-03-12T00:00:00", "2020-03-23T00:00:00", "2020-04-02T00:00:00", "2020-04-10T00:00:00", "2020-04-20T00:00:00", "2020-04-27T00:00:00", "2020-04-30T00:00:00", "2020-05-04T00:00:00", "2020-05-11T00:00:00", "2020-05-14T00:00:00", "2020-05-19T00:00:00", "2020-05-27T00:00:00", "2020-06-04T00:00:00", "2020-06-08T00:00:00", "2020-06-12T00:00:00", "2020-06-16T00:00:00", "2020-07-01T00:00:00", "2020-07-24T00:00:00", "2020-07-30T00:00:00", "2020-08-04T00:00:00", "2020-08-06T00:00:00", "2020-08-11T00:00:00", "2020-08-12T00:00:00", "2020-08-13T00:00:00" ], "xaxis": "x", "y": [ 1.775862193759583, 1.7559295821283578, 1.7669044485223506, 1.7598556666462009, 1.7483328122830732, 1.7941925101118694, 1.902898360067524, 1.9436727004801757, 2.037574005835401, 2.075449889550896, 2.093375872223454, 2.1054456595441873, 2.1257652286077375, 2.131378641964228, 2.119295307031279, 2.1380022241683667, 2.159687202863789, 2.1707078797209, 2.5661155096934833, 2.4391619483286378, 2.398326428407332, 2.4922367674399215, 2.531220777702435, 2.4944224114294027, 2.3992439100641785, 2.4212216768913244, 2.4131811155545044, 2.4098296133786787, 2.46776914046249, 2.477845422034579, 2.468724304558778, 2.479472508445129, 2.5171988373629945, 2.507512964732948, 2.494702988552028, 2.504754750638326, 2.5188510170612473, 2.559539626112753, 2.570726943829121, 2.5621979899666996, 2.5836328903251715, 2.5991866187692776, 2.4731486514458907, 2.5120245676047293, 2.554017111201135, 2.4262059421839113, 2.4852715849119797, 2.5578650852564477, 2.4166416880514947, 2.4887121585317837, 2.534949978681769, 2.2627746635981247, 2.3218839505974436, 2.377071586929335, 2.4137775585222783, 2.3738049682634887, 2.407543027547659, 2.4245634226666724, 2.469597914230462, 2.4503885863676076, 2.489656663697386, 2.520412952165625, 2.540568717388234, 2.547962321101266, 2.516307643172054, 2.4938711633296555, 2.5280056911584805, 2.5549871394954775, 2.5762185030499167, 2.597131013870178, 2.5776175350316093, 2.608760685114831, 2.6326642259400654, 2.6462479872201023, 2.6811754868232347, 2.555908382513759, 2.5934121508175703, 2.5831936843059213, 2.5659324117363855, 2.5466963175410324, 2.4970059372230184, 2.5475534910606448, 2.586304322082383, 2.6145432508888513, 2.655761670630701, 2.6888861963767767, 2.6554219959378815, 2.6776614377018246, 2.698007838873062, 2.7260151471681855, 2.708393670907265, 2.723586688479715, 2.741785620741309, 2.715926362089121, 2.7089819150652836, 2.734510696791031, 2.7329290424498063, 2.752329565012722, 2.7123373441319254, 2.6883276310385473, 2.668836870411125, 2.5874956475207482, 2.6224396342910916, 2.6743312308857243, 2.6233086139964756, 2.6636090434526523, 2.583547108890156, 2.6288867393462296, 2.589752385444303, 2.6281974038841094, 2.6637368568543027, 2.674049591436159, 2.7084382745890037, 2.7381081113927688, 2.7000415136447486, 2.695729522337985, 2.676291950827968, 2.6278900638494225, 2.6869301276572792, 2.6329616843341426, 2.6743673751243913, 2.70332446965937, 2.7258914756378094, 2.738739062759903, 2.7876692253856854, 2.87224999562486, 2.917896896469523, 3.019424290578397, 3.0404603629674103, 3.0044513362657557, 2.9181894533195787, 2.839158053128113, 2.710896286218737, 2.7326654758073685, 2.7506591305364587, 2.4914942542315948, 2.486288109354795, 1.9873332691682966, 2.1919433151482925, 2.5344106505188404, 2.615688128723679, 2.579857739014421, 2.6756204444714062, 2.5740795250642607, 2.666785857072889, 2.5637717494004377, 2.6886429040988795, 2.722437496257588, 2.8347324540908305, 2.8927254254521104, 2.731548534087053, 2.7873966002371446, 2.81589375278397, 2.925899262986658, 2.943742504013843, 2.9716977606021158, 2.996930062017773, 3.0214503905391417, 3.0013955524090314, 3.0361856680715213 ], "yaxis": "y" }, { "marker": { "color": "#008000", "size": 8, "symbol": 5 }, "mode": "markers", "name": "Predicted Buy", "type": "scatter", "x": [ "2016-11-16T00:00:00", "2016-11-22T00:00:00", "2016-11-28T00:00:00", "2016-12-01T00:00:00", "2016-12-12T00:00:00", "2016-12-15T00:00:00", "2016-12-29T00:00:00", "2017-05-18T00:00:00", "2017-05-22T00:00:00", "2017-08-11T00:00:00", "2017-08-15T00:00:00", "2017-08-18T00:00:00", "2017-08-31T00:00:00", "2017-09-01T00:00:00", "2017-09-05T00:00:00", "2017-09-12T00:00:00", "2018-01-30T00:00:00", "2018-02-05T00:00:00", "2018-02-06T00:00:00", "2018-02-09T00:00:00", "2018-02-16T00:00:00", "2018-03-01T00:00:00", "2018-03-02T00:00:00", "2018-03-06T00:00:00", "2018-03-20T00:00:00", "2018-03-23T00:00:00", "2018-03-26T00:00:00", "2018-03-27T00:00:00", "2018-03-28T00:00:00", "2018-04-02T00:00:00", "2018-04-03T00:00:00", "2018-04-05T00:00:00", "2018-04-09T00:00:00", "2018-04-25T00:00:00", "2018-05-03T00:00:00", "2018-05-07T00:00:00", "2018-05-10T00:00:00", "2018-05-29T00:00:00", "2018-06-26T00:00:00", "2018-06-28T00:00:00", "2018-07-02T00:00:00", "2018-07-06T00:00:00", "2018-07-10T00:00:00", "2018-07-13T00:00:00", "2018-07-26T00:00:00", "2018-07-30T00:00:00", "2018-10-11T00:00:00", "2018-10-19T00:00:00", "2018-10-25T00:00:00", "2018-10-26T00:00:00", "2018-10-29T00:00:00", "2018-11-13T00:00:00", "2018-11-19T00:00:00", "2018-11-20T00:00:00", "2018-11-21T00:00:00", "2018-12-05T00:00:00", "2018-12-10T00:00:00", "2018-12-17T00:00:00", "2018-12-18T00:00:00", "2018-12-20T00:00:00", "2018-12-21T00:00:00", "2018-12-24T00:00:00", "2018-12-25T00:00:00", "2018-12-26T00:00:00", "2018-12-27T00:00:00", "2019-01-02T00:00:00", "2019-01-04T00:00:00", "2019-03-25T00:00:00", "2019-05-08T00:00:00", "2019-10-29T00:00:00", "2019-11-08T00:00:00", "2019-12-05T00:00:00", "2019-12-09T00:00:00", "2020-01-28T00:00:00", "2020-01-29T00:00:00", "2020-02-05T00:00:00", "2020-02-06T00:00:00", "2020-06-01T00:00:00", "2020-06-24T00:00:00", "2020-06-25T00:00:00", "2020-06-29T00:00:00", "2020-07-06T00:00:00", "2020-07-15T00:00:00", "2020-07-21T00:00:00" ], "xaxis": "x", "y": [ 1.818530976722427, 1.8435095389024307, 1.8645743482836645, 1.8443887939649266, 1.9274040893213034, 1.9190123979109754, 1.91448662216106, 2.0540166466187704, 2.084866833367072, 2.153662841446222, 2.1864647619718443, 2.1438219306507103, 2.1767277319269507, 2.193292876177753, 2.176987062769343, 2.212509740847792, 2.5958512503745683, 2.507055004703301, 2.3908655672182757, 2.3175314144753254, 2.4760145804014537, 2.4583534453371563, 2.4210443591764186, 2.465602707449793, 2.4574234137411266, 2.3852984077688055, 2.3255036185876117, 2.4010140300278757, 2.3520132844728967, 2.3851226221455715, 2.31849937952855, 2.3863600805386254, 2.343184839919537, 2.3755778855456877, 2.376762210573199, 2.406136000095508, 2.441895150600946, 2.414457691048197, 2.4616604434457527, 2.4437903182827747, 2.462985738226229, 2.4814815247090123, 2.5288614970526617, 2.5426981274645915, 2.588762441353397, 2.56263258215833, 2.530346147948126, 2.513661751798842, 2.3984319974907695, 2.4498960927053073, 2.401161358138082, 2.470968135276078, 2.4811383385591963, 2.434604997650178, 2.3833858456504564, 2.444233153583558, 2.373997461996785, 2.338270986221647, 2.2785594875944417, 2.2342865591635976, 2.188323853389332, 2.1276463288262466, 2.046538838295945, 2.116565946279799, 2.188653197846902, 2.263197258546287, 2.1652170247726574, 2.5450590909173383, 2.624569481635533, 2.764081309583493, 2.803173440522439, 2.8263225474739637, 2.853747930341794, 2.9321631163780384, 2.957556946076665, 2.9739806650731007, 3.002148708999684, 2.7683003651193236, 2.841705701574125, 2.7734804874070784, 2.7376522496035665, 2.8663291965974755, 2.8956045973939792, 2.9385919910370286 ], "yaxis": "y" }, { "marker": { "color": "#FF0000", "size": 8, "symbol": 6 }, "mode": "markers", "name": "Predicted Sell", "type": "scatter", "x": [ "2018-07-31T00:00:00", "2020-03-03T00:00:00", "2020-03-05T00:00:00", "2020-03-11T00:00:00", "2020-03-13T00:00:00", "2020-03-16T00:00:00", "2020-03-17T00:00:00", "2020-03-25T00:00:00", "2020-03-27T00:00:00", "2020-04-07T00:00:00", "2020-04-22T00:00:00" ], "xaxis": "x", "y": [ 2.546901076766248, 2.8074350254256997, 2.8407384524185724, 2.6228694991910007, 2.2038118577299106, 2.455480533109273, 2.090312626929054, 2.164550661707284, 2.370779107908172, 2.406409079720038, 2.481431023890419 ], "yaxis": "y" } ], "layout": { "colorway": [ "#004481", "#2DCCCD", "#D8BE75", "#1973B8", "#5BBEFF", "#F7893B", "#02A5A5", "#48AE64", "#F8CD51", "#F78BE8" ], "hovermode": "x", "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "white", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "white", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "#C8D4E3", "linecolor": "#C8D4E3", "minorgridcolor": "#C8D4E3", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "#C8D4E3", "linecolor": "#C8D4E3", "minorgridcolor": "#C8D4E3", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "white", "showlakes": true, "showland": true, "subunitcolor": "#C8D4E3" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "white", "polar": { "angularaxis": { "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "" }, "bgcolor": "white", "radialaxis": { "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "white", "gridcolor": "#DFE8F3", "gridwidth": 2, "linecolor": "#EBF0F8", "showbackground": true, "ticks": "", "zerolinecolor": "#EBF0F8" }, "yaxis": { "backgroundcolor": "white", "gridcolor": "#DFE8F3", "gridwidth": 2, "linecolor": "#EBF0F8", "showbackground": true, "ticks": "", "zerolinecolor": "#EBF0F8" }, "zaxis": { "backgroundcolor": "white", "gridcolor": "#DFE8F3", "gridwidth": 2, "linecolor": "#EBF0F8", "showbackground": true, "ticks": "", "zerolinecolor": "#EBF0F8" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "#DFE8F3", "linecolor": "#A2B1C6", "ticks": "" }, "baxis": { "gridcolor": "#DFE8F3", "linecolor": "#A2B1C6", "ticks": "" }, "bgcolor": "white", "caxis": { "gridcolor": "#DFE8F3", "linecolor": "#A2B1C6", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "#EBF0F8", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "#EBF0F8", "zerolinewidth": 2 } } }, "title": { "text": "SP500 Index and predicted positions | Test" }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "rangeselector": { "buttons": [ { "count": 1, "label": "1m", "step": "month", "stepmode": "backward" }, { "count": 6, "label": "6m", "step": "month", "stepmode": "backward" }, { "count": 1, "label": "YTD", "step": "year", "stepmode": "todate" }, { "count": 1, "label": "1y", "step": "year", "stepmode": "backward" }, { "count": 3, "label": "3y", "step": "year", "stepmode": "backward" }, { "count": 5, "label": "5y", "step": "year", "stepmode": "backward" }, { "step": "all" } ], "font": { "color": "black" } }, "rangeslider": { "visible": true } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Price" } } } }, "text/html": [ "
\n", " \n", " \n", "
\n", " \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plotting labels to see outcome\n", "#configure_plotly_browser_state()\n", "figura = make_subplots(specs=[[{\"secondary_y\": False}]])\n", "\n", "dataset = X_test_\n", "dataset['Predictions'] = final_predictions\n", "dataset['Labels'] = test_labels['label']\n", "\n", "figura.add_trace(go.Scatter(y=X_test['Adj Close'],\n", " x=X_test.index,\n", " mode='lines',\n", " name='SP500 Close Price'),\n", " secondary_y=False,)\n", "figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.Labels == 1)]['Adj Close'],\n", " x=dataset[(dataset.Labels == 1)].index,\n", " mode='markers',\n", " name='Real Buy',\n", " marker=dict(size=8, color='#2DCCCD'),\n", " marker_symbol=19),\n", " secondary_y=False,)\n", "figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.Labels == -1)]['Adj Close'],\n", " x=dataset[(dataset.Labels == -1)].index,\n", " mode='markers',\n", " name='Real Sell',\n", " marker=dict(size=8, color='#D8BE75'),\n", " marker_symbol=20),\n", " secondary_y=False,)\n", "figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.Predictions == 0)]['Adj Close'],\n", " x=dataset[(dataset.Predictions == 0)].index,\n", " mode='markers',\n", " name='Predicted Do Not Take Bet',\n", " marker=dict(size=5, color='#bfbfbf'),\n", " marker_symbol=0),\n", " secondary_y=False,)\n", "figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.Predictions == 1)]['Adj Close'],\n", " x=dataset[(dataset.Predictions == 1)].index,\n", " mode='markers',\n", " name='Predicted Buy',\n", " marker=dict(size=8, color='#008000'),\n", " marker_symbol=5),\n", " secondary_y=False,)\n", "figura.add_trace(\n", " go.Scatter(y=dataset[(dataset.Predictions == -1)]['Adj Close'],\n", " x=dataset[(dataset.Predictions == -1)].index,\n", " mode='markers',\n", " name='Predicted Sell',\n", " marker=dict(size=8, color='#FF0000'),\n", " marker_symbol=6),\n", " secondary_y=False,)\n", "\n", "figura.update_layout(\n", " title_text='SP500 Index and predicted positions | Test',\n", " colorway = bbva)\n", "\n", "figura.update_xaxes(rangeslider_visible=True)\n", "figura.update_yaxes(title_text=\"Price\", secondary_y=False)\n", "\n", "figura.update_xaxes(\n", " rangeslider_visible=True,\n", " rangeselector=dict(\n", " dict(font = dict(color = \"black\")),\n", " buttons=list([\n", " dict(count=1, label=\"1m\", step=\"month\", stepmode=\"backward\"),\n", " dict(count=6, label=\"6m\", step=\"month\", stepmode=\"backward\"),\n", " dict(count=1, label=\"YTD\", step=\"year\", stepmode=\"todate\"),\n", " dict(count=1, label=\"1y\", step=\"year\", stepmode=\"backward\"),\n", " dict(count=3, label=\"3y\", step=\"year\", stepmode=\"backward\"),\n", " dict(count=5, label=\"5y\", step=\"year\", stepmode=\"backward\"),\n", " dict(step=\"all\"),\n", " ])\n", " )\n", ")\n", "\n", "figura.update_layout(template='plotly_white', hovermode='x')\n", "iplot(figura)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }