{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Ledoit-Wolf vs OAS estimation\n\nThe usual covariance maximum likelihood estimate can be regularized\nusing shrinkage. Ledoit and Wolf proposed a close formula to compute\nthe asymptotically optimal shrinkage parameter (minimizing a MSE\ncriterion), yielding the Ledoit-Wolf covariance estimate.\n\nChen et al. proposed an improvement of the Ledoit-Wolf shrinkage\nparameter, the OAS coefficient, whose convergence is significantly\nbetter under the assumption that the data are Gaussian.\n\nThis example, inspired from Chen's publication [1], shows a comparison\nof the estimated MSE of the LW and OAS methods, using Gaussian\ndistributed data.\n\n[1] \"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom scipy.linalg import cholesky, toeplitz\n\nfrom sklearn.covariance import OAS, LedoitWolf\n\nnp.random.seed(0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "n_features = 100\n# simulation covariance matrix (AR(1) process)\nr = 0.1\nreal_cov = toeplitz(r ** np.arange(n_features))\ncoloring_matrix = cholesky(real_cov)\n\nn_samples_range = np.arange(6, 31, 1)\nrepeat = 100\nlw_mse = np.zeros((n_samples_range.size, repeat))\noa_mse = np.zeros((n_samples_range.size, repeat))\nlw_shrinkage = np.zeros((n_samples_range.size, repeat))\noa_shrinkage = np.zeros((n_samples_range.size, repeat))\nfor i, n_samples in enumerate(n_samples_range):\n for j in range(repeat):\n X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)\n\n lw = LedoitWolf(store_precision=False, assume_centered=True)\n lw.fit(X)\n lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)\n lw_shrinkage[i, j] = lw.shrinkage_\n\n oa = OAS(store_precision=False, assume_centered=True)\n oa.fit(X)\n oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)\n oa_shrinkage[i, j] = oa.shrinkage_\n\n# plot MSE\nplt.subplot(2, 1, 1)\nplt.errorbar(\n n_samples_range,\n lw_mse.mean(1),\n yerr=lw_mse.std(1),\n label=\"Ledoit-Wolf\",\n color=\"navy\",\n lw=2,\n)\nplt.errorbar(\n n_samples_range,\n oa_mse.mean(1),\n yerr=oa_mse.std(1),\n label=\"OAS\",\n color=\"darkorange\",\n lw=2,\n)\nplt.ylabel(\"Squared error\")\nplt.legend(loc=\"upper right\")\nplt.title(\"Comparison of covariance estimators\")\nplt.xlim(5, 31)\n\n# plot shrinkage coefficient\nplt.subplot(2, 1, 2)\nplt.errorbar(\n n_samples_range,\n lw_shrinkage.mean(1),\n yerr=lw_shrinkage.std(1),\n label=\"Ledoit-Wolf\",\n color=\"navy\",\n lw=2,\n)\nplt.errorbar(\n n_samples_range,\n oa_shrinkage.mean(1),\n yerr=oa_shrinkage.std(1),\n label=\"OAS\",\n color=\"darkorange\",\n lw=2,\n)\nplt.xlabel(\"n_samples\")\nplt.ylabel(\"Shrinkage\")\nplt.legend(loc=\"lower right\")\nplt.ylim(plt.ylim()[0], 1.0 + (plt.ylim()[1] - plt.ylim()[0]) / 10.0)\nplt.xlim(5, 31)\n\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }