{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## [02_Probabilistic.ipynb](https://github.com/raybellwaves/xskillscore-tutorial/blob/master/02_Probabilistic.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook shows how to use probabilistic metrics in a typical data science task where the data is a pandas.DataFrame.\n", "\n", "The metric Continuous Ranked Probability Score (CRPS) is used to verify multiple forecasts for the same target." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", "import pandas as pd\n", "import numpy as np\n", "import xskillscore as xs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the same data as in [01_Deterministic.ipynb](https://github.com/raybellwaves/xskillscore-tutorial/blob/master/01_Determinisitic.ipynb):" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | \n", " | \n", " | y | \n", "
|---|---|---|---|
| DATE | \n", "STORE | \n", "SKU | \n", "\n", " |
| 2020-01-01 | \n", "0 | \n", "0 | \n", "9 | \n", "
| 1 | \n", "2 | \n", "||
| 2 | \n", "4 | \n", "||
| 1 | \n", "0 | \n", "1 | \n", "|
| 1 | \n", "5 | \n", "
| \n", " | \n", " | \n", " | y | \n", "member | \n", "yhat | \n", "
|---|---|---|---|---|---|
| DATE | \n", "STORE | \n", "SKU | \n", "\n", " | \n", " | \n", " |
| 2020-01-01 | \n", "0 | \n", "0 | \n", "9 | \n", "1 | \n", "10 | \n", "
| 1 | \n", "2 | \n", "1 | \n", "0 | \n", "||
| 2 | \n", "4 | \n", "1 | \n", "2 | \n", "||
| 1 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "|
| 1 | \n", "5 | \n", "1 | \n", "5 | \n", "||
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 2020-01-05 | \n", "2 | \n", "1 | \n", "8 | \n", "6 | \n", "7 | \n", "
| 2 | \n", "2 | \n", "6 | \n", "0 | \n", "||
| 3 | \n", "0 | \n", "8 | \n", "6 | \n", "8 | \n", "|
| 1 | \n", "3 | \n", "6 | \n", "1 | \n", "||
| 2 | \n", "5 | \n", "6 | \n", "9 | \n", "
360 rows × 3 columns
\n", "| \n", " | \n", " | \n", " | \n", " | yhat | \n", "
|---|---|---|---|---|
| DATE | \n", "STORE | \n", "SKU | \n", "member | \n", "\n", " |
| 2020-01-01 | \n", "0 | \n", "0 | \n", "1 | \n", "10 | \n", "
| 1 | \n", "1 | \n", "0 | \n", "||
| 2 | \n", "1 | \n", "2 | \n", "||
| 1 | \n", "0 | \n", "1 | \n", "0 | \n", "|
| 1 | \n", "1 | \n", "5 | \n", "||
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 2020-01-05 | \n", "2 | \n", "1 | \n", "6 | \n", "7 | \n", "
| 2 | \n", "6 | \n", "0 | \n", "||
| 3 | \n", "0 | \n", "6 | \n", "8 | \n", "|
| 1 | \n", "6 | \n", "1 | \n", "||
| 2 | \n", "6 | \n", "9 | \n", "
360 rows × 1 columns
\n", "array(['2020-01-01T00:00:00.000000000', '2020-01-02T00:00:00.000000000',\n",
" '2020-01-03T00:00:00.000000000', '2020-01-04T00:00:00.000000000',\n",
" '2020-01-05T00:00:00.000000000'], dtype='datetime64[ns]')array([0, 1, 2, 3])
array([0, 1, 2])
array([[[9, 2, 4],\n",
" [1, 5, 8],\n",
" [6, 3, 1],\n",
" [6, 4, 5]],\n",
"\n",
" [[3, 4, 5],\n",
" [5, 6, 8],\n",
" [5, 4, 1],\n",
" [8, 2, 7]],\n",
"\n",
" [[8, 3, 2],\n",
" [4, 4, 2],\n",
" [2, 3, 5],\n",
" [1, 4, 6]],\n",
"\n",
" [[2, 3, 9],\n",
" [2, 7, 1],\n",
" [4, 1, 9],\n",
" [4, 3, 9]],\n",
"\n",
" [[2, 8, 7],\n",
" [7, 3, 4],\n",
" [8, 8, 2],\n",
" [8, 3, 5]]])array(['2020-01-01T00:00:00.000000000', '2020-01-02T00:00:00.000000000',\n",
" '2020-01-03T00:00:00.000000000', '2020-01-04T00:00:00.000000000',\n",
" '2020-01-05T00:00:00.000000000'], dtype='datetime64[ns]')array([0, 1, 2, 3])
array([0, 1, 2])
array([1, 2, 3, 4, 5, 6])
array([[[9, 2, 4],\n",
" [1, 5, 8],\n",
" [6, 3, 1],\n",
" [6, 4, 5]],\n",
"\n",
" [[3, 4, 5],\n",
" [5, 6, 8],\n",
" [5, 4, 1],\n",
" [8, 2, 7]],\n",
"\n",
" [[8, 3, 2],\n",
" [4, 4, 2],\n",
" [2, 3, 5],\n",
" [1, 4, 6]],\n",
"\n",
" [[2, 3, 9],\n",
" [2, 7, 1],\n",
" [4, 1, 9],\n",
" [4, 3, 9]],\n",
"\n",
" [[2, 8, 7],\n",
" [7, 3, 4],\n",
" [8, 8, 2],\n",
" [8, 3, 5]]])array([[[[10, 10, 1, 10, 16, 11],\n",
" [ 0, 3, 1, 0, 2, 0],\n",
" [ 2, 5, 7, 2, 0, 1]],\n",
"\n",
" [[ 0, 1, 0, 0, 0, 0],\n",
" [ 5, 0, 8, 8, 9, 1],\n",
" [ 1, 4, 15, 2, 1, 10]],\n",
"\n",
" [[ 0, 3, 5, 4, 5, 5],\n",
" [ 3, 2, 0, 1, 4, 1],\n",
" [ 1, 1, 0, 0, 0, 1]],\n",
"\n",
" [[10, 3, 11, 10, 10, 0],\n",
" [ 3, 5, 3, 3, 1, 2],\n",
" [ 4, 3, 6, 4, 0, 2]]],\n",
"\n",
"\n",
" [[[ 4, 3, 0, 1, 0, 0],\n",
" [ 5, 3, 7, 6, 1, 1],\n",
" [ 0, 7, 8, 6, 5, 1]],\n",
"\n",
" [[ 5, 1, 8, 6, 3, 6],\n",
" [ 6, 9, 11, 4, 10, 6],\n",
" [ 0, 5, 5, 6, 3, 15]],\n",
"\n",
" [[ 2, 5, 9, 2, 4, 5],\n",
" [ 2, 3, 1, 3, 6, 6],\n",
" [ 0, 0, 1, 1, 0, 0]],\n",
"\n",
" [[15, 2, 13, 10, 4, 4],\n",
" [ 0, 2, 0, 0, 3, 2],\n",
" [ 6, 7, 8, 1, 10, 7]]],\n",
"\n",
"\n",
" [[[ 6, 1, 12, 7, 5, 10],\n",
" [ 3, 0, 0, 1, 1, 3],\n",
" [ 1, 2, 0, 2, 2, 3]],\n",
"\n",
" [[ 6, 5, 5, 7, 1, 1],\n",
" [ 6, 3, 2, 3, 1, 2],\n",
" [ 0, 0, 3, 0, 1, 3]],\n",
"\n",
" [[ 1, 3, 0, 1, 0, 3],\n",
" [ 0, 5, 0, 2, 2, 0],\n",
" [ 7, 9, 6, 5, 2, 1]],\n",
"\n",
" [[ 0, 1, 0, 1, 0, 0],\n",
" [ 4, 5, 6, 1, 7, 4],\n",
" [ 0, 7, 4, 6, 5, 0]]],\n",
"\n",
"\n",
" [[[ 0, 3, 0, 1, 1, 2],\n",
" [ 3, 4, 3, 5, 5, 0],\n",
" [ 2, 15, 11, 16, 11, 14]],\n",
"\n",
" [[ 3, 2, 2, 3, 1, 1],\n",
" [ 8, 9, 2, 6, 0, 12],\n",
" [ 0, 0, 0, 1, 0, 1]],\n",
"\n",
" [[ 5, 0, 2, 1, 3, 2],\n",
" [ 1, 0, 1, 0, 0, 0],\n",
" [ 7, 8, 4, 5, 6, 4]],\n",
"\n",
" [[ 7, 6, 5, 5, 3, 7],\n",
" [ 0, 0, 4, 2, 0, 3],\n",
" [ 5, 10, 1, 8, 0, 0]]],\n",
"\n",
"\n",
" [[[ 3, 1, 2, 3, 1, 3],\n",
" [ 8, 11, 15, 6, 11, 13],\n",
" [12, 7, 13, 5, 12, 8]],\n",
"\n",
" [[ 6, 6, 4, 1, 9, 13],\n",
" [ 3, 1, 4, 0, 4, 3],\n",
" [ 6, 7, 0, 7, 4, 3]],\n",
"\n",
" [[ 1, 7, 11, 0, 11, 2],\n",
" [ 7, 0, 3, 15, 6, 7],\n",
" [ 3, 1, 2, 2, 1, 0]],\n",
"\n",
" [[ 6, 2, 1, 2, 9, 8],\n",
" [ 5, 0, 4, 1, 2, 1],\n",
" [ 5, 4, 5, 8, 1, 9]]]])array([[[1.16666667, 0.72222222, 1.19444444],\n",
" [0.69444444, 1.25 , 2.75 ],\n",
" [1.44444444, 0.75 , 0.25 ],\n",
" [2.22222222, 0.86111111, 1.13888889]],\n",
"\n",
" [[1.16666667, 0.86111111, 0.86111111],\n",
" [0.58333333, 0.94444444, 2.33333333],\n",
" [0.58333333, 0.80555556, 0.44444444],\n",
" [1.94444444, 0.52777778, 0.41666667]],\n",
"\n",
" [[1.19444444, 1. , 0.16666667],\n",
" [0.91666667, 1.02777778, 0.80555556],\n",
" [0.66666667, 1.25 , 0.77777778],\n",
" [0.44444444, 0.47222222, 1.16666667]],\n",
"\n",
" [[0.58333333, 0.44444444, 2.47222222],\n",
" [0.22222222, 1.19444444, 0.44444444],\n",
" [1.30555556, 0.44444444, 2.5 ],\n",
" [1.08333333, 0.97222222, 3.16666667]],\n",
"\n",
" [[0.36111111, 1.66666667, 1.52777778],\n",
" [1.08333333, 0.36111111, 0.80555556],\n",
" [2.16666667, 1.55555556, 0.30555556],\n",
" [1.94444444, 0.86111111, 0.55555556]]])array(['2020-01-01T00:00:00.000000000', '2020-01-02T00:00:00.000000000',\n",
" '2020-01-03T00:00:00.000000000', '2020-01-04T00:00:00.000000000',\n",
" '2020-01-05T00:00:00.000000000'], dtype='datetime64[ns]')array([0, 1, 2, 3])
array([0, 1, 2])
array(0.56481481)