{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "LaTeX macros (hidden cell)\n", "$\n", "\\newcommand{\\Q}{\\mathcal{Q}}\n", "\\newcommand{\\ECov}{\\boldsymbol{\\Sigma}}\n", "\\newcommand{\\EMean}{\\boldsymbol{\\mu}}\n", "\\newcommand{\\EAlpha}{\\boldsymbol{\\alpha}}\n", "\\newcommand{\\EBeta}{\\boldsymbol{\\beta}}\n", "$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Imports and configuration" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": false }, "outputs": [], "source": [ "import sys\n", "import os\n", "import re\n", "import datetime as dt\n", "\n", "import numpy as np\n", "import pandas as pd\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "from matplotlib.colors import LinearSegmentedColormap\n", "\n", "from mosek.fusion import *\n", "\n", "from notebook.services.config import ConfigManager\n", "\n", "from portfolio_tools import data_download, DataReader, compute_inputs" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.9.7 (default, Sep 16 2021, 13:09:58) \n", "[GCC 7.5.0]\n", "matplotlib: 3.4.3\n" ] } ], "source": [ "# Version checks\n", "print(sys.version)\n", "print('matplotlib: {}'.format(matplotlib.__version__))\n", "\n", "# Jupyter configuration\n", "c = ConfigManager()\n", "c.update('notebook', {\"CodeCell\": {\"cm_config\": {\"autoCloseBrackets\": False}}}) \n", "\n", "# Numpy options\n", "np.set_printoptions(precision=5, linewidth=120, suppress=True)\n", "\n", "# Pandas options\n", "pd.set_option('display.max_rows', None)\n", "\n", "# Matplotlib options\n", "plt.rcParams['figure.figsize'] = [12, 8]\n", "plt.rcParams['figure.dpi'] = 200" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Prepare input data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we load the raw data that will be used to compute the optimization input variables, the vector $\\EMean$ of expected returns and the covariance matrix $\\ECov$. The data consists of daily stock prices of $8$ stocks from the US market. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Data downloading:\n", "# If the user has an API key for alphavantage.co, then this code part will download the data. \n", "# The code can be modified to download from other sources. To be able to run the examples, \n", "# and reproduce results in the cookbook, the files have to have the following format and content:\n", "# - File name pattern: \"daily_adjusted_[TICKER].csv\", where TICKER is the symbol of a stock. \n", "# - The file contains at least columns \"timestamp\", \"adjusted_close\", and \"volume\".\n", "# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18, \n", "# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.\n", "list_stocks = [\"PM\", \"LMT\", \"MCD\", \"MMM\", \"AAPL\", \"MSFT\", \"TXN\", \"CSCO\"]\n", "list_factors = []\n", "alphaToken = None\n", " \n", "list_tickers = list_stocks + list_factors\n", "if alphaToken is not None:\n", " data_download(list_tickers, alphaToken) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "investment_start = \"2016-03-18\"\n", "investment_end = \"2021-03-18\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found data files: \n", "stock_data/daily_adjusted_AAPL.csv\n", "stock_data/daily_adjusted_PM.csv\n", "stock_data/daily_adjusted_CSCO.csv\n", "stock_data/daily_adjusted_TXN.csv\n", "stock_data/daily_adjusted_MMM.csv\n", "stock_data/daily_adjusted_IWM.csv\n", "stock_data/daily_adjusted_MCD.csv\n", "stock_data/daily_adjusted_SPY.csv\n", "stock_data/daily_adjusted_MSFT.csv\n", "stock_data/daily_adjusted_LMT.csv\n", "\n", "Using data files: \n", "stock_data/daily_adjusted_PM.csv\n", "stock_data/daily_adjusted_LMT.csv\n", "stock_data/daily_adjusted_MCD.csv\n", "stock_data/daily_adjusted_MMM.csv\n", "stock_data/daily_adjusted_AAPL.csv\n", "stock_data/daily_adjusted_MSFT.csv\n", "stock_data/daily_adjusted_TXN.csv\n", "stock_data/daily_adjusted_CSCO.csv\n", "\n" ] } ], "source": [ "# The files are in \"stock_data\" folder, named as \"daily_adjusted_[TICKER].csv\"\n", "dr = DataReader(folder_path=\"stock_data\", symbol_list=list_tickers)\n", "dr.read_data()\n", "df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Run the optimization" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define the optimization model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# |x| <= t\n", "def absval(M, x, t):\n", " M.constraint(Expr.add(t, x), Domain.greaterThan(0.0))\n", " M.constraint(Expr.sub(t, x), Domain.greaterThan(0.0))\n", "\n", "# ||x||_1 <= t\n", "def norm1(M, x, t):\n", " u = M.variable(x.getShape(), Domain.unbounded())\n", " absval(M, x, u)\n", " M.constraint(Expr.sub(Expr.sum(u), t), Domain.equalsTo(0.0))\n", "\n", "def EfficientFrontier(N, m, G, deltas):\n", "\n", " with Model(\"Case study\") as M:\n", " # Settings\n", " #M.setLogHandler(sys.stdout)\n", " \n", " # Variables \n", " # The variable x is the fraction of holdings relative to the initial capital. \n", " # It is a free variable, allowing long and short positions.\n", " x = M.variable(\"x\", N, Domain.unbounded())\n", " \n", " # The variable s models the portfolio variance term in the objective.\n", " s = M.variable(\"s\", 1, Domain.unbounded())\n", " \n", " # Gross exposure constraint (allows 2 times the initial capital)\n", " norm1(M, x, 2.0)\n", " \n", " # Dollar neutrality constraint\n", " M.constraint('neutrality', Expr.sum(x), Domain.equalsTo(0.0))\n", " \n", " # Objective (quadratic utility version)\n", " delta = M.parameter()\n", " M.objective('obj', ObjectiveSense.Maximize, Expr.sub(Expr.dot(m, x), Expr.mul(delta, s)))\n", "\n", " # Conic constraint for the portfolio variance \n", " M.constraint('risk', Expr.vstack(s, 0.5, Expr.mul(G.transpose(), x)), Domain.inRotatedQCone())\n", " \n", " # Create DataFrame to store the results. Last security name (the SPY) is removed.\n", " columns = [\"delta\", \"obj\", \"return\", \"risk\", \"g. exp.\"] + df_prices.columns.tolist()\n", " df_result = pd.DataFrame(columns=columns)\n", " for d in deltas:\n", " # Update parameter\n", " delta.setValue(d)\n", " \n", " # Solve optimization\n", " M.solve()\n", " \n", " # Check if the solution is an optimal point\n", " solsta = M.getPrimalSolutionStatus()\n", " if (solsta != SolutionStatus.Optimal):\n", " # See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.\n", " raise Exception(\"Unexpected solution status!\") \n", " \n", " # Save results\n", " portfolio_return = m @ x.level()\n", " portfolio_risk = np.sqrt(s.level()[0])\n", " gross_exp = sum(np.absolute(x.level()))\n", " row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk, gross_exp] + list(x.level()), index=columns)\n", " df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True)\n", "\n", " return df_result" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute optimization input variables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Number of securities\n", "N = df_prices.shape[1]\n", "\n", "# Get optimization parameters\n", "m, S = compute_inputs(df_prices)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next we compute the matrix $G$ such that $\\ECov=GG^\\mathsf{T}$, this is the input of the conic form of the optimization problem. Here we use Cholesky factorization." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "G = np.linalg.cholesky(S)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Call the optimizer function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We run the optimization for a range of risk aversion parameter values: $\\delta = 10^{-1},\\dots,10^{1.5}$. We compute the efficient frontier this way both with and without using shrinkage estimation. " ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Compute efficient frontier with and without shrinkage\n", "deltas = np.logspace(start=-1, stop=1.5, num=20)[::-1]\n", "df_result = EfficientFrontier(N, m, G, deltas)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check the results." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | delta | \n", "obj | \n", "return | \n", "risk | \n", "g. exp. | \n", "PM | \n", "LMT | \n", "MCD | \n", "MMM | \n", "AAPL | \n", "MSFT | \n", "TXN | \n", "CSCO | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "31.622777 | \n", "0.012181 | \n", "0.024362 | \n", "0.019627 | \n", "0.174698 | \n", "-0.029866 | \n", "-1.813651e-03 | \n", "-3.327063e-03 | \n", "-3.965543e-02 | \n", "0.017223 | \n", "4.664747e-02 | \n", "2.347876e-02 | \n", "-1.268697e-02 | \n", "
1 | \n", "23.357215 | \n", "0.016492 | \n", "0.032983 | \n", "0.026572 | \n", "0.236517 | \n", "-0.040435 | \n", "-2.455977e-03 | \n", "-4.504609e-03 | \n", "-5.368734e-02 | \n", "0.023318 | \n", "6.315380e-02 | \n", "3.178662e-02 | \n", "-1.717610e-02 | \n", "
2 | \n", "17.252105 | \n", "0.022328 | \n", "0.044655 | \n", "0.035975 | \n", "0.320215 | \n", "-0.054743 | \n", "-3.324976e-03 | \n", "-6.098627e-03 | \n", "-7.268608e-02 | \n", "0.031570 | \n", "8.550237e-02 | \n", "4.303523e-02 | \n", "-2.325436e-02 | \n", "
3 | \n", "12.742750 | \n", "0.030229 | \n", "0.060457 | \n", "0.048705 | \n", "0.433530 | \n", "-0.074115 | \n", "-4.501704e-03 | \n", "-8.256829e-03 | \n", "-9.840758e-02 | \n", "0.042741 | \n", "1.157593e-01 | \n", "5.826417e-02 | \n", "-3.148341e-02 | \n", "
4 | \n", "9.412050 | \n", "0.040926 | \n", "0.081850 | \n", "0.065940 | \n", "0.586933 | \n", "-0.100340 | \n", "-6.094617e-03 | \n", "-1.117865e-02 | \n", "-1.332288e-01 | \n", "0.057866 | \n", "1.567183e-01 | \n", "7.888206e-02 | \n", "-4.262430e-02 | \n", "
5 | \n", "6.951928 | \n", "0.055409 | \n", "0.110821 | \n", "0.089279 | \n", "0.794681 | \n", "-0.135856 | \n", "-8.250688e-03 | \n", "-1.513487e-02 | \n", "-1.803870e-01 | \n", "0.078346 | \n", "2.121911e-01 | \n", "1.068038e-01 | \n", "-5.771226e-02 | \n", "
6 | \n", "5.134833 | \n", "0.075016 | \n", "0.150038 | \n", "0.120873 | \n", "1.075906 | \n", "-0.183931 | \n", "-1.117064e-02 | \n", "-2.049049e-02 | \n", "-2.442259e-01 | \n", "0.106069 | \n", "2.872825e-01 | \n", "1.446021e-01 | \n", "-7.813552e-02 | \n", "
7 | \n", "3.792690 | \n", "0.101563 | \n", "0.203125 | \n", "0.163641 | \n", "1.456578 | \n", "-0.249013 | \n", "-1.512627e-02 | \n", "-2.774053e-02 | \n", "-3.306328e-01 | \n", "0.143605 | \n", "3.889271e-01 | \n", "1.957575e-01 | \n", "-1.057763e-01 | \n", "
8 | \n", "2.801357 | \n", "0.137504 | \n", "0.275011 | \n", "0.221554 | \n", "1.972071 | \n", "-0.337138 | \n", "-2.047841e-02 | \n", "-3.756367e-02 | \n", "-4.476385e-01 | \n", "0.194424 | \n", "5.265729e-01 | \n", "2.650385e-01 | \n", "-1.432172e-01 | \n", "
9 | \n", "2.069138 | \n", "0.177107 | \n", "0.298429 | \n", "0.242145 | \n", "2.000000 | \n", "-0.415083 | \n", "-1.588155e-07 | \n", "-3.462223e-08 | \n", "-5.158595e-01 | \n", "0.246408 | \n", "5.503360e-01 | \n", "2.032559e-01 | \n", "-6.905687e-02 | \n", "
10 | \n", "1.528307 | \n", "0.211422 | \n", "0.315446 | \n", "0.260893 | \n", "2.000000 | \n", "-0.476219 | \n", "-2.838718e-08 | \n", "-1.277994e-08 | \n", "-5.237810e-01 | \n", "0.309753 | \n", "5.874499e-01 | \n", "1.027971e-01 | \n", "-6.044888e-08 | \n", "
11 | \n", "1.128838 | \n", "0.240334 | \n", "0.326192 | \n", "0.275787 | \n", "2.000000 | \n", "-0.523018 | \n", "-1.064179e-08 | \n", "-6.128413e-09 | \n", "-4.769823e-01 | \n", "0.381652 | \n", "6.183482e-01 | \n", "9.539798e-08 | \n", "-1.080083e-08 | \n", "
12 | \n", "0.833782 | \n", "0.263130 | \n", "0.328904 | \n", "0.280866 | \n", "2.000000 | \n", "-0.559561 | \n", "-1.468358e-08 | \n", "-7.858693e-09 | \n", "-4.404388e-01 | \n", "0.438818 | \n", "5.611816e-01 | \n", "4.608019e-08 | \n", "-1.012394e-08 | \n", "
13 | \n", "0.615848 | \n", "0.280802 | \n", "0.332577 | \n", "0.289949 | \n", "2.000000 | \n", "-0.609080 | \n", "-1.087698e-08 | \n", "-6.424718e-09 | \n", "-3.909199e-01 | \n", "0.516232 | \n", "4.837682e-01 | \n", "1.275609e-08 | \n", "-7.333844e-09 | \n", "
14 | \n", "0.454878 | \n", "0.294985 | \n", "0.337553 | \n", "0.305909 | \n", "2.000000 | \n", "-0.676100 | \n", "-1.154550e-09 | \n", "-8.503845e-10 | \n", "-3.239002e-01 | \n", "0.621124 | \n", "3.788759e-01 | \n", "1.145439e-09 | \n", "-6.673766e-10 | \n", "
15 | \n", "0.335982 | \n", "0.306991 | \n", "0.344282 | \n", "0.333155 | \n", "2.000000 | \n", "-0.766835 | \n", "-2.036140e-10 | \n", "-1.237167e-10 | \n", "-2.331652e-01 | \n", "0.762936 | \n", "2.370640e-01 | \n", "1.872415e-10 | \n", "-1.152393e-10 | \n", "
16 | \n", "0.248163 | \n", "0.317929 | \n", "0.353408 | \n", "0.378110 | \n", "2.000000 | \n", "-0.889826 | \n", "-4.466695e-08 | \n", "-2.863282e-08 | \n", "-1.101740e-01 | \n", "0.955296 | \n", "4.470383e-02 | \n", "4.271174e-08 | \n", "-2.692994e-08 | \n", "
17 | \n", "0.183298 | \n", "0.327910 | \n", "0.356984 | \n", "0.398270 | \n", "2.000000 | \n", "-0.999999 | \n", "-5.917077e-09 | \n", "-3.883621e-09 | \n", "-9.958170e-07 | \n", "1.000000 | \n", "6.649493e-08 | \n", "4.721761e-09 | \n", "-3.563641e-09 | \n", "
18 | \n", "0.135388 | \n", "0.335509 | \n", "0.356984 | \n", "0.398270 | \n", "2.000000 | \n", "-1.000000 | \n", "-6.606806e-09 | \n", "-4.545752e-09 | \n", "-4.995802e-08 | \n", "1.000000 | \n", "2.714103e-08 | \n", "4.472662e-09 | \n", "-4.067312e-09 | \n", "
19 | \n", "0.100000 | \n", "0.341122 | \n", "0.356984 | \n", "0.398270 | \n", "2.000000 | \n", "-1.000000 | \n", "-1.669964e-09 | \n", "-1.164704e-09 | \n", "-9.496848e-09 | \n", "1.000000 | \n", "9.899558e-09 | \n", "1.368620e-09 | \n", "-1.014214e-09 | \n", "