{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## [02_Probabilistic.ipynb](https://github.com/raybellwaves/xskillscore-tutorial/blob/master/02_Probabilistic.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook shows how to use probabilistic metrics in a typical data science task where the data is a pandas.DataFrame.\n", "\n", "The metric Continuous Ranked Probability Score (CRPS) is used to verify multiple forecasts for the same target." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", "import pandas as pd\n", "import numpy as np\n", "import xskillscore as xs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the same data as in [01_Deterministic.ipynb](https://github.com/raybellwaves/xskillscore-tutorial/blob/master/01_Determinisitic.ipynb)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | \n", " | \n", " | y | \n", "
---|---|---|---|
DATE | \n", "STORE | \n", "SKU | \n", "\n", " |
2020-01-01 | \n", "0 | \n", "0 | \n", "6 | \n", "
1 | \n", "9 | \n", "||
2 | \n", "2 | \n", "||
1 | \n", "0 | \n", "6 | \n", "|
1 | \n", "8 | \n", "
\n", " | \n", " | \n", " | y | \n", "member | \n", "yhat | \n", "
---|---|---|---|---|---|
DATE | \n", "STORE | \n", "SKU | \n", "\n", " | \n", " | \n", " |
2020-01-01 | \n", "0 | \n", "0 | \n", "6 | \n", "1 | \n", "4 | \n", "
1 | \n", "9 | \n", "1 | \n", "7 | \n", "||
2 | \n", "2 | \n", "1 | \n", "1 | \n", "||
1 | \n", "0 | \n", "6 | \n", "1 | \n", "1 | \n", "|
1 | \n", "8 | \n", "1 | \n", "5 | \n", "||
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2020-01-05 | \n", "2 | \n", "1 | \n", "3 | \n", "6 | \n", "0 | \n", "
2 | \n", "1 | \n", "6 | \n", "1 | \n", "||
3 | \n", "0 | \n", "9 | \n", "6 | \n", "7 | \n", "|
1 | \n", "3 | \n", "6 | \n", "3 | \n", "||
2 | \n", "2 | \n", "6 | \n", "0 | \n", "
360 rows × 3 columns
\n", "\n", " | \n", " | \n", " | \n", " | yhat | \n", "
---|---|---|---|---|
DATE | \n", "STORE | \n", "SKU | \n", "member | \n", "\n", " |
2020-01-01 | \n", "0 | \n", "0 | \n", "1 | \n", "4 | \n", "
1 | \n", "1 | \n", "7 | \n", "||
2 | \n", "1 | \n", "1 | \n", "||
1 | \n", "0 | \n", "1 | \n", "1 | \n", "|
1 | \n", "1 | \n", "5 | \n", "||
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2020-01-05 | \n", "2 | \n", "1 | \n", "6 | \n", "0 | \n", "
2 | \n", "6 | \n", "1 | \n", "||
3 | \n", "0 | \n", "6 | \n", "7 | \n", "|
1 | \n", "6 | \n", "3 | \n", "||
2 | \n", "6 | \n", "0 | \n", "
360 rows × 1 columns
\n", "array(['2020-01-01T00:00:00.000000000', '2020-01-02T00:00:00.000000000',\n", " '2020-01-03T00:00:00.000000000', '2020-01-04T00:00:00.000000000',\n", " '2020-01-05T00:00:00.000000000'], dtype='datetime64[ns]')
array([0, 1, 2, 3])
array([0, 1, 2])
array([[[6, 9, 2],\n", " [6, 8, 8],\n", " [2, 3, 8],\n", " [8, 2, 7]],\n", "\n", " [[2, 4, 9],\n", " [3, 4, 6],\n", " [7, 2, 2],\n", " [2, 9, 1]],\n", "\n", " [[6, 5, 8],\n", " [3, 1, 9],\n", " [6, 1, 9],\n", " [3, 9, 8]],\n", "\n", " [[4, 3, 7],\n", " [1, 1, 3],\n", " [8, 5, 7],\n", " [3, 7, 3]],\n", "\n", " [[1, 5, 6],\n", " [9, 2, 9],\n", " [7, 3, 1],\n", " [9, 3, 2]]])
array(['2020-01-01T00:00:00.000000000', '2020-01-02T00:00:00.000000000',\n", " '2020-01-03T00:00:00.000000000', '2020-01-04T00:00:00.000000000',\n", " '2020-01-05T00:00:00.000000000'], dtype='datetime64[ns]')
array([0, 1, 2, 3])
array([0, 1, 2])
array([1, 2, 3, 4, 5, 6])
array([[[6, 9, 2],\n", " [6, 8, 8],\n", " [2, 3, 8],\n", " [8, 2, 7]],\n", "\n", " [[2, 4, 9],\n", " [3, 4, 6],\n", " [7, 2, 2],\n", " [2, 9, 1]],\n", "\n", " [[6, 5, 8],\n", " [3, 1, 9],\n", " [6, 1, 9],\n", " [3, 9, 8]],\n", "\n", " [[4, 3, 7],\n", " [1, 1, 3],\n", " [8, 5, 7],\n", " [3, 7, 3]],\n", "\n", " [[1, 5, 6],\n", " [9, 2, 9],\n", " [7, 3, 1],\n", " [9, 3, 2]]])
array([[[[ 4, 3, 3, 5, 2, 9],\n", " [ 7, 16, 14, 14, 7, 15],\n", " [ 1, 1, 0, 1, 0, 3]],\n", "\n", " [[ 1, 9, 11, 6, 10, 3],\n", " [ 5, 4, 1, 14, 5, 14],\n", " [ 0, 4, 12, 4, 8, 0]],\n", "\n", " [[ 3, 2, 0, 3, 3, 2],\n", " [ 5, 1, 0, 0, 2, 4],\n", " [ 7, 4, 1, 7, 9, 1]],\n", "\n", " [[ 6, 3, 4, 9, 12, 12],\n", " [ 3, 2, 1, 3, 2, 1],\n", " [ 6, 6, 2, 7, 10, 1]]],\n", "\n", "\n", " [[[ 1, 0, 1, 3, 1, 0],\n", " [ 0, 7, 3, 6, 3, 2],\n", " [11, 12, 11, 11, 6, 9]],\n", "\n", " [[ 3, 0, 3, 4, 5, 1],\n", " [ 5, 4, 7, 0, 6, 0],\n", " [ 1, 8, 2, 1, 2, 3]],\n", "\n", " [[ 4, 1, 9, 5, 12, 11],\n", " [ 1, 0, 3, 2, 1, 0],\n", " [ 2, 0, 1, 2, 2, 3]],\n", "\n", " [[ 3, 0, 0, 3, 2, 0],\n", " [14, 13, 5, 9, 17, 11],\n", " [ 1, 1, 1, 1, 0, 1]]],\n", "\n", "\n", " [[[10, 4, 0, 1, 7, 3],\n", " [ 3, 7, 3, 2, 2, 2],\n", " [ 4, 8, 11, 5, 8, 4]],\n", "\n", " [[ 1, 0, 3, 0, 0, 4],\n", " [ 1, 0, 0, 1, 1, 0],\n", " [13, 6, 7, 6, 3, 4]],\n", "\n", " [[ 3, 8, 2, 2, 11, 9],\n", " [ 0, 0, 0, 1, 0, 1],\n", " [14, 5, 5, 9, 4, 12]],\n", "\n", " [[ 4, 2, 3, 4, 0, 5],\n", " [15, 9, 6, 6, 16, 2],\n", " [ 7, 13, 0, 9, 4, 12]]],\n", "\n", "\n", " [[[ 5, 0, 1, 7, 3, 6],\n", " [ 3, 1, 3, 0, 1, 4],\n", " [ 1, 8, 4, 9, 4, 0]],\n", "\n", " [[ 0, 0, 1, 0, 0, 1],\n", " [ 0, 0, 0, 0, 1, 1],\n", " [ 1, 1, 5, 1, 5, 3]],\n", "\n", " [[ 4, 4, 3, 1, 7, 2],\n", " [ 2, 0, 2, 5, 5, 5],\n", " [ 0, 12, 12, 2, 6, 6]],\n", "\n", " [[ 3, 2, 0, 3, 5, 0],\n", " [ 4, 0, 4, 5, 1, 6],\n", " [ 1, 3, 0, 1, 4, 2]]],\n", "\n", "\n", " [[[ 0, 0, 1, 0, 0, 1],\n", " [ 6, 5, 2, 4, 2, 5],\n", " [ 3, 10, 5, 8, 8, 7]],\n", "\n", " [[ 5, 13, 16, 16, 2, 3],\n", " [ 0, 0, 3, 0, 0, 2],\n", " [13, 1, 8, 2, 0, 8]],\n", "\n", " [[10, 8, 11, 5, 1, 0],\n", " [ 0, 3, 2, 2, 4, 0],\n", " [ 0, 1, 1, 1, 0, 1]],\n", "\n", " [[ 8, 15, 14, 4, 5, 7],\n", " [ 2, 5, 3, 1, 2, 3],\n", " [ 1, 1, 3, 2, 2, 0]]]])
array([[[1.5 , 2.58333333, 0.83333333],\n", " [1.27777778, 2.19444444, 2.33333333],\n", " [0.30555556, 0.94444444, 1.80555556],\n", " [1.33333333, 0.22222222, 1. ]],\n", "\n", " [[0.83333333, 0.86111111, 1. ],\n", " [0.38888889, 0.83333333, 2.69444444],\n", " [1.44444444, 0.58333333, 0.16666667],\n", " [0.61111111, 1.69444444, 0.02777778]],\n", "\n", " [[1.58333333, 1.69444444, 0.94444444],\n", " [1.16666667, 0.25 , 2.19444444],\n", " [1.52777778, 0.44444444, 1.41666667],\n", " [0.44444444, 1.55555556, 1.30555556]],\n", "\n", " [[0.88888889, 0.55555556, 1.83333333],\n", " [0.44444444, 0.44444444, 0.72222222],\n", " [3.47222222, 0.80555556, 1.5 ],\n", " [0.52777778, 2.5 , 0.75 ]],\n", "\n", " [[0.44444444, 0.5 , 0.91666667],\n", " [2.58333333, 0.91666667, 2.44444444],\n", " [1.47222222, 0.69444444, 0.11111111],\n", " [1.52777778, 0.33333333, 0.30555556]]])
array(['2020-01-01T00:00:00.000000000', '2020-01-02T00:00:00.000000000',\n", " '2020-01-03T00:00:00.000000000', '2020-01-04T00:00:00.000000000',\n", " '2020-01-05T00:00:00.000000000'], dtype='datetime64[ns]')
array([0, 1, 2, 3])
array([0, 1, 2])
array(0.80694444)