{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LaTeX macros (hidden cell)\n",
    "$\n",
    "\\newcommand{\\Q}{\\mathcal{Q}}\n",
    "\\newcommand{\\ECov}{\\boldsymbol{\\Sigma}}\n",
    "\\newcommand{\\EMean}{\\boldsymbol{\\mu}}\n",
    "\\newcommand{\\EAlpha}{\\boldsymbol{\\alpha}}\n",
    "\\newcommand{\\EBeta}{\\boldsymbol{\\beta}}\n",
    "$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports and configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "import re\n",
    "import datetime as dt\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.colors import LinearSegmentedColormap\n",
    "\n",
    "from mosek.fusion import *\n",
    "import mosek.fusion.pythonic    # Requires MOSEK >= 10.2\n",
    "\n",
    "from notebook.services.config import ConfigManager\n",
    "\n",
    "from portfolio_tools import data_download, DataReader, compute_inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.9.7 (default, Sep 16 2021, 13:09:58) \n",
      "[GCC 7.5.0]\n",
      "matplotlib: 3.7.2\n"
     ]
    }
   ],
   "source": [
    "# Version checks\n",
    "print(sys.version)\n",
    "print('matplotlib: {}'.format(matplotlib.__version__))\n",
    "\n",
    "# Jupyter configuration\n",
    "c = ConfigManager()\n",
    "c.update('notebook', {\"CodeCell\": {\"cm_config\": {\"autoCloseBrackets\": False}}})  \n",
    "\n",
    "# Numpy options\n",
    "np.set_printoptions(precision=5, linewidth=120, suppress=True)\n",
    "\n",
    "# Pandas options\n",
    "pd.set_option('display.max_rows', None)\n",
    "\n",
    "# Matplotlib options\n",
    "plt.rcParams['figure.figsize'] = [12, 8]\n",
    "plt.rcParams['figure.dpi'] = 200"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prepare input data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we load the raw data that will be used to compute the yearly return observation series used for the optimization. The data consists of daily stock prices of $8$ stocks from the US market, and SPY as the benchmark."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data downloading:\n",
    "# If the user has an API key for alphavantage.co, then this code part will download the data. \n",
    "# The code can be modified to download from other sources. To be able to run the examples, \n",
    "# and reproduce results in the cookbook, the files have to have the following format and content:\n",
    "# - File name pattern: \"daily_adjusted_[TICKER].csv\", where TICKER is the symbol of a stock. \n",
    "# - The file contains at least columns \"timestamp\", \"adjusted_close\", and \"volume\".\n",
    "# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18, \n",
    "# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.\n",
    "list_stocks = [\"PM\", \"LMT\", \"MCD\", \"MMM\", \"AAPL\", \"MSFT\", \"TXN\", \"CSCO\"]\n",
    "list_factors = [\"SPY\"]\n",
    "alphaToken = None\n",
    " \n",
    "list_tickers = list_stocks + list_factors\n",
    "if alphaToken is not None:\n",
    "    data_download(list_tickers, alphaToken)  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "metadata": {},
   "outputs": [],
   "source": [
    "investment_start = \"2016-03-18\"\n",
    "investment_end = \"2021-03-18\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found data files: \n",
      "stock_data/daily_adjusted_AAPL.csv\n",
      "stock_data/daily_adjusted_PM.csv\n",
      "stock_data/daily_adjusted_CSCO.csv\n",
      "stock_data/daily_adjusted_TXN.csv\n",
      "stock_data/daily_adjusted_MMM.csv\n",
      "stock_data/daily_adjusted_IWM.csv\n",
      "stock_data/daily_adjusted_MCD.csv\n",
      "stock_data/daily_adjusted_SPY.csv\n",
      "stock_data/daily_adjusted_MSFT.csv\n",
      "stock_data/daily_adjusted_LMT.csv\n",
      "\n",
      "Using data files: \n",
      "stock_data/daily_adjusted_PM.csv\n",
      "stock_data/daily_adjusted_LMT.csv\n",
      "stock_data/daily_adjusted_MCD.csv\n",
      "stock_data/daily_adjusted_MMM.csv\n",
      "stock_data/daily_adjusted_AAPL.csv\n",
      "stock_data/daily_adjusted_MSFT.csv\n",
      "stock_data/daily_adjusted_TXN.csv\n",
      "stock_data/daily_adjusted_CSCO.csv\n",
      "stock_data/daily_adjusted_SPY.csv\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# The files are in \"stock_data\" folder, named as \"daily_adjusted_[TICKER].csv\"\n",
    "dr = DataReader(folder_path=\"stock_data\", symbol_list=list_tickers)\n",
    "dr.read_data()\n",
    "df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run the optimization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define the optimization model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [],
   "source": [
    "def absval(M, x, z):\n",
    "    M.constraint(z >= x)\n",
    "    M.constraint(z >= -x)\n",
    "    \n",
    "def norm1(M, x, t):\n",
    "    z = M.variable(x.getSize(), Domain.greaterThan(0.0))\n",
    "    absval(M, x, z)\n",
    "    M.constraint(Expr.sum(z) == t)\n",
    "\n",
    "def MinTrackingError(N, R, r_bm, x0, lambda_1, lambda_2, beta=1.5):\n",
    "\n",
    "    with Model(\"Case study\") as M:\n",
    "        # Settings\n",
    "        M.setLogHandler(sys.stdout)\n",
    "        \n",
    "        # Variables \n",
    "        # The variable x is the fraction of holdings in each security. \n",
    "        # It is restricted to be positive, which imposes the constraint of no short-selling.   \n",
    "        x = M.variable(\"x\", N, Domain.greaterThan(0.0))\n",
    "        xt = x - x0\n",
    "        \n",
    "        # The variable t models the OLS objective function term (tracking error).\n",
    "        t = M.variable(\"t\", 1, Domain.unbounded())\n",
    "        # The variables u and v model the regularization terms (transaction cost penalties).\n",
    "        u = M.variable(\"u\", 1, Domain.unbounded())\n",
    "        v = M.variable(\"v\", N, Domain.unbounded())   \n",
    "    \n",
    "        # Budget constraint\n",
    "        M.constraint('budget', Expr.sum(x) == 1.0)\n",
    "        \n",
    "        # Objective \n",
    "        penalty_lin = lambda_1 * u\n",
    "        penalty_32 = lambda_2 * Expr.sum(v)\n",
    "        M.objective('obj', ObjectiveSense.Minimize, t + penalty_lin + penalty_32)\n",
    "        \n",
    "        # Constraints for the penalties\n",
    "        norm1(M, xt, u)\n",
    "        M.constraint('market_impact', Expr.hstack(v, Expr.constTerm(N, 1.0), xt), Domain.inPPowerCone(1.0 / beta))\n",
    "\n",
    "        # Constraint for the tracking error \n",
    "        residual = R.T @ x - r_bm\n",
    "        M.constraint('tracking_error', Expr.vstack(t, 0.5, residual), Domain.inRotatedQCone())\n",
    "        \n",
    "        # Create DataFrame to store the results. Last security name (the SPY) is removed.\n",
    "        columns = [\"track_err\", \"lin_tcost\", \"mkt_tcost\"] + df_prices.columns[:N].tolist()\n",
    "        df_result = pd.DataFrame(columns=columns)\n",
    "        \n",
    "        # Solve optimization\n",
    "        M.solve()\n",
    "            \n",
    "        # Check if the solution is an optimal point\n",
    "        solsta = M.getPrimalSolutionStatus()\n",
    "        if (solsta != SolutionStatus.Optimal):\n",
    "            # See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.\n",
    "            raise Exception(\"Unexpected solution status!\") \n",
    "\n",
    "        # Save results\n",
    "        tracking_error = t.level()[0]\n",
    "        linear_tcost = u.level()[0]\n",
    "        market_impact_tcost = np.sum(v.level())\n",
    "        row = pd.Series([tracking_error, linear_tcost, market_impact_tcost] + list(x.level()), index=columns)\n",
    "        df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True)\n",
    "\n",
    "        return df_result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compute optimization input variables"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix for logarithmic returns, and compute linear return observations from that. The benchmark will be the last data series, corresponding to SPY."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Number of securities and observations\n",
    "N = df_prices.shape[1] - 1\n",
    "T = 1000\n",
    "\n",
    "# Mean and covariance of historical yearly log-returns.  \n",
    "m_log, S_log = compute_inputs(df_prices, return_log=True)\n",
    "\n",
    "# Generate logarithmic return observations assuming normal distribution\n",
    "scenarios_log = np.random.default_rng().multivariate_normal(m_log, S_log, T)\n",
    "    \n",
    "# Convert logarithmic return observations to linear return observations \n",
    "scenarios_lin = np.exp(scenarios_log) - 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We center and normalize the data matrices."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Center the return data\n",
    "centered_return = scenarios_lin - scenarios_lin.mean(axis=0)\n",
    "\n",
    "# Security return scenarios\n",
    "security_return = scenarios_lin[:, :N] / np.sqrt(T - 1)\n",
    "\n",
    "# Benchmark return scenarios\n",
    "benchmark_return = scenarios_lin[:, -1] / np.sqrt(T - 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Call the optimizer function"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We run the optimization for the given penalty coefficients, and initial portfolio."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Problem\n",
      "  Name                   : Case study      \n",
      "  Objective sense        : minimize        \n",
      "  Type                   : CONIC (conic optimization problem)\n",
      "  Constraints            : 18              \n",
      "  Affine conic cons.     : 9 (1026 rows)\n",
      "  Disjunctive cons.      : 0               \n",
      "  Cones                  : 0               \n",
      "  Scalar variables       : 27              \n",
      "  Matrix variables       : 0               \n",
      "  Integer variables      : 0               \n",
      "\n",
      "Optimizer started.\n",
      "Presolve started.\n",
      "Linear dependency checker started.\n",
      "Linear dependency checker terminated.\n",
      "Eliminator started.\n",
      "Freed constraints in eliminator : 0\n",
      "Eliminator terminated.\n",
      "Eliminator - tries                  : 1                 time                   : 0.00            \n",
      "Lin. dep.  - tries                  : 1                 time                   : 0.00            \n",
      "Lin. dep.  - primal attempts        : 1                 successes              : 1               \n",
      "Lin. dep.  - dual attempts          : 0                 successes              : 0               \n",
      "Lin. dep.  - primal deps.           : 0                 dual deps.             : 0               \n",
      "Presolve terminated. Time: 0.00    \n",
      "Optimizer  - threads                : 64              \n",
      "Optimizer  - solved problem         : the dual        \n",
      "Optimizer  - Constraints            : 16              \n",
      "Optimizer  - Cones                  : 10              \n",
      "Optimizer  - Scalar variables       : 1060              conic                  : 1028            \n",
      "Optimizer  - Semi-definite variables: 0                 scalarized             : 0               \n",
      "Factor     - setup time             : 0.00            \n",
      "Factor     - dense det. time        : 0.00              GP order time          : 0.00            \n",
      "Factor     - nonzeros before factor : 52                after factor           : 52              \n",
      "Factor     - dense dim.             : 0                 flops                  : 7.25e+04        \n",
      "ITE PFEAS    DFEAS    GFEAS    PRSTATUS   POBJ              DOBJ              MU       TIME  \n",
      "0   1.0e+00  1.3e+00  5.1e+00  0.00e+00   7.086559745e-01   -3.432754826e+00  1.0e+00  0.01  \n",
      "1   4.8e-01  6.2e-01  8.7e-01  2.71e+00   4.120265400e-01   -5.852995592e-01  4.8e-01  0.01  \n",
      "2   9.4e-02  1.2e-01  5.8e-02  1.65e+00   6.234989703e-02   -8.267879019e-02  9.4e-02  0.01  \n",
      "3   1.2e-02  1.6e-02  2.6e-03  1.16e+00   1.277717889e-02   -4.930056889e-03  1.2e-02  0.01  \n",
      "4   4.6e-03  6.0e-03  5.9e-04  1.01e+00   7.690849598e-03   1.100286126e-03   4.6e-03  0.01  \n",
      "5   1.5e-03  1.9e-03  8.8e-05  1.17e+00   7.162884540e-03   5.228693323e-03   1.5e-03  0.01  \n",
      "6   3.2e-04  4.2e-04  4.8e-06  1.43e+00   6.614174541e-03   6.288874887e-03   3.2e-04  0.01  \n",
      "7   5.7e-05  7.4e-05  3.2e-07  1.93e+00   6.352699725e-03   6.316106202e-03   5.7e-05  0.01  \n",
      "8   1.3e-05  1.7e-05  3.6e-08  1.11e+00   6.338763707e-03   6.330495870e-03   1.3e-05  0.01  \n",
      "9   8.8e-07  1.1e-06  5.7e-10  1.02e+00   6.335649771e-03   6.335109751e-03   8.8e-07  0.01  \n",
      "10  2.4e-08  2.4e-08  1.8e-12  1.00e+00   6.335321684e-03   6.335310105e-03   1.9e-08  0.01  \n",
      "11  7.2e-10  9.4e-10  1.3e-14  1.00e+00   6.335315628e-03   6.335315186e-03   7.3e-10  0.01  \n",
      "Optimizer terminated. Time: 0.02    \n",
      "\n",
      "\n",
      "Interior-point solution summary\n",
      "  Problem status  : PRIMAL_AND_DUAL_FEASIBLE\n",
      "  Solution status : OPTIMAL\n",
      "  Primal.  obj: 6.3353156286e-03    nrm: 1e+00    Viol.  con: 1e-10    var: 0e+00    acc: 0e+00  \n",
      "  Dual.    obj: 6.3353151856e-03    nrm: 1e+00    Viol.  con: 2e-11    var: 2e-11    acc: 0e+00  \n"
     ]
    }
   ],
   "source": [
    "lambda_1 = 0.0001 \n",
    "lambda_2 = 0.0001 \n",
    "x0 = np.ones(N) / N\n",
    "\n",
    "df_result = MinTrackingError(N, security_return.T, benchmark_return, x0, lambda_1, lambda_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check the results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>track_err</th>\n",
       "      <th>lin_tcost</th>\n",
       "      <th>mkt_tcost</th>\n",
       "      <th>PM</th>\n",
       "      <th>LMT</th>\n",
       "      <th>MCD</th>\n",
       "      <th>MMM</th>\n",
       "      <th>AAPL</th>\n",
       "      <th>MSFT</th>\n",
       "      <th>TXN</th>\n",
       "      <th>CSCO</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.006301</td>\n",
       "      <td>0.286418</td>\n",
       "      <td>0.060916</td>\n",
       "      <td>0.090031</td>\n",
       "      <td>0.166661</td>\n",
       "      <td>0.168084</td>\n",
       "      <td>0.171201</td>\n",
       "      <td>0.048057</td>\n",
       "      <td>0.101428</td>\n",
       "      <td>0.117276</td>\n",
       "      <td>0.137262</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   track_err  lin_tcost  mkt_tcost        PM       LMT       MCD       MMM  \\\n",
       "0   0.006301   0.286418   0.060916  0.090031  0.166661  0.168084  0.171201   \n",
       "\n",
       "       AAPL      MSFT       TXN      CSCO  \n",
       "0  0.048057  0.101428  0.117276  0.137262  "
      ]
     },
     "execution_count": 207,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}