{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LaTeX macros (hidden cell)\n",
    "$\n",
    "\\newcommand{\\Q}{\\mathcal{Q}}\n",
    "\\newcommand{\\ECov}{\\boldsymbol{\\Sigma}}\n",
    "\\newcommand{\\EMean}{\\boldsymbol{\\mu}}\n",
    "\\newcommand{\\EAlpha}{\\boldsymbol{\\alpha}}\n",
    "\\newcommand{\\EBeta}{\\boldsymbol{\\beta}}\n",
    "$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports and configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "FILE=/content/portfolio_tools.py\n",
    "if [[ ! -f $FILE ]]; then\n",
    "    wget https://raw.githubusercontent.com/MOSEK/PortfolioOptimization/main/python/notebooks/portfolio_tools.py\n",
    "fi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install mosek \n",
    "%env PYTHONPATH /env/python:/content\n",
    "%env MOSEKLM_LICENSE_FILE /content/mosek.lic:/root/mosek/mosek.lic\n",
    "\n",
    "# To execute the notebook directly in colab make sure your MOSEK license file is in one the locations\n",
    "#\n",
    "# /content/mosek.lic   or   /root/mosek/mosek.lic\n",
    "#\n",
    "# inside this notebook's internal filesystem. \n",
    "#\n",
    "# You will also need an API key from a stock data provider, or ready data files in a \"stock_data\" folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import datetime as dt\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.api as sm\n",
    "import scipy.stats as stats\n",
    "from scipy.optimize import brentq\n",
    "from scipy.linalg import sqrtm, inv\n",
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.colors import LinearSegmentedColormap\n",
    "\n",
    "from mosek.fusion import *\n",
    "import mosek.fusion.pythonic    # From Mosek >= 10.2\n",
    "\n",
    "from notebook.services.config import ConfigManager\n",
    "\n",
    "# portfolio_tools.py is a Mosek helper file distributed together with the notebooks\n",
    "from portfolio_tools import data_download, DataReader, compute_inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Version checks\n",
    "print(sys.version)\n",
    "print('matplotlib: {}'.format(matplotlib.__version__))\n",
    "\n",
    "# Jupyter configuration\n",
    "c = ConfigManager()\n",
    "c.update('notebook', {\"CodeCell\": {\"cm_config\": {\"autoCloseBrackets\": False}}})  \n",
    "\n",
    "# Numpy options\n",
    "np.set_printoptions(precision=5, linewidth=120, suppress=True)\n",
    "\n",
    "# Pandas options\n",
    "pd.set_option('display.max_rows', None)\n",
    "\n",
    "# Matplotlib options\n",
    "plt.rcParams['figure.figsize'] = [12, 8]\n",
    "plt.rcParams['figure.dpi'] = 200"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prepare input data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we load the raw data that will be used to compute the optimization input variables, the vector $\\EMean$ of expected returns and the covariance matrix $\\ECov$. The data consists of daily stock prices of $8$ stocks from the US market. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data downloading:\n",
    "# If the user has an API key for alphavantage.co, then this code part will download the data. \n",
    "# The code can be modified to download from other sources. To be able to run the examples, \n",
    "# and reproduce results in the cookbook, the files have to have the following format and content:\n",
    "# - File name pattern: \"daily_adjusted_[TICKER].csv\", where TICKER is the symbol of a stock. \n",
    "# - The file contains at least columns \"timestamp\", \"adjusted_close\", and \"volume\".\n",
    "# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18, \n",
    "# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.\n",
    "list_stocks = [\"PM\", \"LMT\", \"MCD\", \"MMM\", \"AAPL\", \"MSFT\", \"TXN\", \"CSCO\"]\n",
    "list_factors = []\n",
    "alphaToken = None\n",
    " \n",
    "list_tickers = list_stocks + list_factors\n",
    "if alphaToken is not None:\n",
    "    data_download(list_tickers, alphaToken)  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "investment_start = \"2016-03-18\"\n",
    "investment_end = \"2021-03-18\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# The files are in \"stock_data\" folder, named as \"daily_adjusted_[TICKER].csv\"\n",
    "dr = DataReader(folder_path=\"stock_data\", symbol_list=list_tickers)\n",
    "dr.read_data()\n",
    "df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run the optimization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define the optimization model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def EfficientFrontier(N, mu0, gamma, GQ, G, deltas):\n",
    "\n",
    "    with Model(\"Case study\") as M:\n",
    "        # Settings\n",
    "        #M.setLogHandler(sys.stdout)\n",
    "        \n",
    "        # Variables \n",
    "        # The variable x is the fraction of holdings in each security. \n",
    "        # It is restricted to be positive, which imposes the constraint of no short-selling.   \n",
    "        x = M.variable(\"x\", N, Domain.greaterThan(0.0))\n",
    "        \n",
    "        # The variable s models the portfolio risk term.\n",
    "        s = M.variable(\"s\", 1, Domain.greaterThan(0.0))\n",
    "        \n",
    "        # The variable sq models the robustness term.\n",
    "        sq = M.variable(\"sq\", 1, Domain.greaterThan(0.0))\n",
    "        \n",
    "        # Budget constraint\n",
    "        M.constraint('budget', Expr.sum(x) == 1.0)\n",
    "        \n",
    "        # Objective\n",
    "        delta = M.parameter()\n",
    "        wc_return = x.T @ mu0 - gamma * sq\n",
    "        M.objective('obj', ObjectiveSense.Maximize, wc_return - delta * s)\n",
    "        \n",
    "        # Robustness \n",
    "        M.constraint('robustness', Expr.vstack(sq, GQ.T @ x), Domain.inQCone())\n",
    "                       \n",
    "        # Risk constraint\n",
    "        M.constraint('risk', Expr.vstack(s, 1, G.T @ x), Domain.inRotatedQCone())\n",
    "                    \n",
    "        # Create DataFrame to store the results. Last security names (the factors) are removed.\n",
    "        columns = [\"delta\", \"obj\", \"return\", \"risk\"] + df_prices.columns.tolist()\n",
    "        df_result = pd.DataFrame(columns=columns)\n",
    "        for d in deltas:\n",
    "            # Update parameter\n",
    "            delta.setValue(d)\n",
    "            \n",
    "            # Solve optimization\n",
    "            M.solve()\n",
    "            \n",
    "            # Check if the solution is an optimal point\n",
    "            solsta = M.getPrimalSolutionStatus()\n",
    "            if (solsta != SolutionStatus.Optimal):\n",
    "                # See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.\n",
    "                raise Exception(\"Unexpected solution status!\")\n",
    "            \n",
    "            # Save results\n",
    "            portfolio_return = mu0 @ x.level() - gamma * sq.level()[0]\n",
    "            portfolio_risk = np.sqrt(2 * s.level()[0])\n",
    "            row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk] + list(x.level()), index=columns)\n",
    "            df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True)\n",
    "\n",
    "        return df_result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compute optimization input variables"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Number of securities (We subtract fnum to account for factors at the end of the price data)\n",
    "N = df_prices.shape[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we compute the same using the factor model. First we compute logarithmic return statistics and use them to compute the factor exposures and covariances. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "m, S = compute_inputs(df_prices)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Next we compute the matrix $G$ such that $\\ECov=GG^\\mathsf{T}$, this is the input of the conic form of the optimization problem. Here we use Cholesky factorization."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "G = np.linalg.cholesky(S)  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We compute the parameters of the uncertainty sets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Uncertainty set parameters\n",
    "gamma = 0.02\n",
    "GQ = G\n",
    "mu0 = m\n",
    "\n",
    "# To get back the non_robust case, we have to zero the bounds\n",
    "gamma_z = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Call the optimizer function"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We run the optimization for a range of risk aversion parameter values: $\\delta = 10^{-1},\\dots,10^{2}$. We compute the efficient frontier this way both with and without using factor model. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute efficient frontier with and without factor model\n",
    "deltas = np.logspace(start=-1, stop=2, num=20)[::-1]\n",
    "df_result_orig = EfficientFrontier(N, mu0, gamma_z, GQ, G, deltas)\n",
    "df_result_robust = EfficientFrontier(N, mu0, gamma, GQ, G, deltas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# Set small negatives to zero to make plotting work\n",
    "mask = df_result_orig < 0\n",
    "mask.iloc[:, :-8] = False\n",
    "df_result_orig[mask] = 0\n",
    "\n",
    "# Set small negatives to zero to make plotting work\n",
    "mask = df_result_robust < 0\n",
    "mask.iloc[:, :-8] = False\n",
    "df_result_robust[mask] = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualize the results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot the efficient frontier for both cases."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "ax = df_result_robust.plot(x=\"risk\", y=\"return\", style=\"-o\", xlabel=\"portfolio risk (std. dev.)\", ylabel=\"portfolio return\", grid=True)\n",
    "df_result_orig.plot(ax=ax, x=\"risk\", y=\"return\", style=\"-o\", xlabel=\"portfolio risk (std. dev.)\", ylabel=\"portfolio return\", grid=True)   \n",
    "ax.legend([\"robust return\", \"return\"]);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot the portfolio composition for both cases."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# Plot portfolio composition\n",
    "my_cmap = LinearSegmentedColormap.from_list(\"non-extreme gray\", [\"#111111\", \"#eeeeee\"], N=256, gamma=1.0)\n",
    "ax1 = df_result_robust.set_index('risk').iloc[:, 3:].plot.area(colormap=my_cmap, xlabel='portfolio risk (std. dev.)', ylabel=\"x\")\n",
    "ax1.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1)\n",
    "ax2 = df_result_orig.set_index('risk').iloc[:, 3:].plot.area(colormap=my_cmap, xlabel='portfolio risk (std. dev.)', ylabel=\"x\") \n",
    "ax2.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}