{
"cells": [
{
"cell_type": "markdown",
"id": "b45644b4",
"metadata": {},
"source": [
"This notebook serves as explanation for a stackoverflow solution:\n",
"\n",
"\n",
"# See https://stackoverflow.com/a/64832684/333403\n",
"\n",
"
\n",
"\n",
"More answers can be found at .\n",
"\n",
"Please remember: **upvoting** answers keeps people motivated. (And reputation helps to place bounties and thus get answers to hard questions).\n",
"\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b45eff63",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sun Dec 25 10:54:25 2022\n"
]
}
],
"source": [
"import time\n",
"print(time.ctime())\n",
"\n",
"try:\n",
" import ipydex\n",
" # this optional and enables the magic comment `##:` to view intermediate results\n",
" %load_ext ipydex.displaytools\n",
"except ImportError:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "aa1fa3a7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Timestamp | \n",
" Austria | \n",
" Belgium | \n",
" France | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 1993-11-01 | \n",
" 6.18 | \n",
" 7.05 | \n",
" 7.69 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1993-11-02 | \n",
" 6.18 | \n",
" 7.05 | \n",
" 7.61 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1993-11-03 | \n",
" 6.17 | \n",
" 7.20 | \n",
" 7.67 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1993-11-04 | \n",
" 6.17 | \n",
" 7.50 | \n",
" 7.91 | \n",
"
\n",
" \n",
" | 5 | \n",
" 1993-11-15 | \n",
" 6.40 | \n",
" 7.60 | \n",
" 8.61 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"df := Timestamp Austria Belgium France\n",
"1 1993-11-01 6.18 7.05 7.69\n",
"2 1993-11-02 6.18 7.05 7.61\n",
"3 1993-11-03 6.17 7.20 7.67\n",
"4 1993-11-04 6.17 7.50 7.91\n",
"5 1993-11-15 6.40 7.60 8.61"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# original data\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"df = pd.DataFrame(\n",
" {\"Timestamp\" : ['1993-11-01' ,'1993-11-02', '1993-11-03', '1993-11-04','1993-11-15'],\n",
" \"Austria\" : [6.18 ,6.18, 6.17, 6.17, 6.40],\n",
" \"Belgium\" : [7.05, 7.05, 7.2, 7.5, 7.6],\"France\" : [7.69, 7.61, 7.67, 7.91, 8.61]\n",
" },\n",
" index = [1, 2, 3,4,5]\n",
") ##:\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f89a5ee2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"cols := ['Austria', 'Belgium', 'France']"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"col_pairs := [('Austria', 'Belgium'), ('Austria', 'France'), ('Belgium', 'France')]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import itertools as it\n",
"\n",
"# omit timestamp-col\n",
"cols = list(df.columns)[1:] ##:\n",
"\n",
"col_pairs = list(it.combinations(cols, 2)) ##:\n",
"\n",
"# new data frame (yet empty) \n",
"res = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "df494994",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Aus_Bel_corr Aus_Fra_corr Bel_Fra_corr\n",
"0 NaN NaN NaN\n",
"1 NaN NaN NaN\n",
"2 -1.000000 -0.277350 0.277350\n",
"3 -0.755929 -0.654654 0.989743\n",
"4 0.693375 0.969346 0.849167\n"
]
}
],
"source": [
"# complete solution\n",
"\n",
"for pair in col_pairs:\n",
" # select the first three letters of each name of the pair\n",
" corr_name = f\"{pair[0][:3]}_{pair[1][:3]}_corr\"\n",
" res[corr_name] = df[list(pair)].\\\n",
" rolling(min_periods=1, window=3).\\\n",
" corr().iloc[0::2, -1].reset_index(drop=True)\n",
"\n",
"print(str(res))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "38b32962",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pair := ('Austria', 'Belgium')"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"corr_name := 'Aus_Bel_corr'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# one iteration (i.e. one column):\n",
"\n",
"pair = col_pairs[0] ##:\n",
"corr_name = f\"{pair[0][:3]}_{pair[1][:3]}_corr\" ##:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "857e51c1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Austria | \n",
" Belgium | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 6.18 | \n",
" 7.05 | \n",
"
\n",
" \n",
" | 2 | \n",
" 6.18 | \n",
" 7.05 | \n",
"
\n",
" \n",
" | 3 | \n",
" 6.17 | \n",
" 7.20 | \n",
"
\n",
" \n",
" | 4 | \n",
" 6.17 | \n",
" 7.50 | \n",
"
\n",
" \n",
" | 5 | \n",
" 6.40 | \n",
" 7.60 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"tmp := Austria Belgium\n",
"1 6.18 7.05\n",
"2 6.18 7.05\n",
"3 6.17 7.20\n",
"4 6.17 7.50\n",
"5 6.40 7.60"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# select the corresponding columns\n",
"tmp = df[list(pair)] ##:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "1950a772",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tmp2 := Rolling [window=3,min_periods=1,center=False,axis=0,method=single]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# this rolling objects does not yet specify the operation\n",
"tmp2 = tmp.rolling(min_periods=1, window=3) ##:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "71b32979",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" Austria | \n",
" Belgium | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" Austria | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | Belgium | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 2 | \n",
" Austria | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | Belgium | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" Austria | \n",
" 1.000000 | \n",
" -1.000000 | \n",
"
\n",
" \n",
" | Belgium | \n",
" -1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" | 4 | \n",
" Austria | \n",
" 1.000000 | \n",
" -0.755929 | \n",
"
\n",
" \n",
" | Belgium | \n",
" -0.755929 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" | 5 | \n",
" Austria | \n",
" 1.000000 | \n",
" 0.693375 | \n",
"
\n",
" \n",
" | Belgium | \n",
" 0.693375 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"tmp3 := Austria Belgium\n",
"1 Austria NaN NaN\n",
" Belgium NaN NaN\n",
"2 Austria NaN NaN\n",
" Belgium NaN NaN\n",
"3 Austria 1.000000 -1.000000\n",
" Belgium -1.000000 1.000000\n",
"4 Austria 1.000000 -0.755929\n",
" Belgium -0.755929 1.000000\n",
"5 Austria 1.000000 0.693375\n",
" Belgium 0.693375 1.000000"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# now specify the operation to `corr`\n",
"tmp3 = tmp2.corr() ##:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "60e6a811",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MultiIndex([(1, 'Austria'),\n",
" (1, 'Belgium'),\n",
" (2, 'Austria'),\n",
" (2, 'Belgium'),\n",
" (3, 'Austria'),\n",
" (3, 'Belgium'),\n",
" (4, 'Austria'),\n",
" (4, 'Belgium'),\n",
" (5, 'Austria'),\n",
" (5, 'Belgium')],\n",
" )"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The previous result contains to much data. We are interested only in every second row.\n",
"# It also has a \"messed up\" index:\n",
"tmp3.index"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0d758cf3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Purely integer-location based indexing for selection by position.\n",
"\n",
"``.iloc[]`` is primarily integer position based (from ``0`` to\n",
"``length-1`` of the axis), but may also be used with a boolean\n",
"array.\n",
"\n",
"Allowed inputs are:\n",
"\n",
"- An integer, e.g. ``5``.\n",
"- A list or array of integers, e.g. ``[4, 3, 0]``.\n",
"- A slice object with ints, e.g. ``1:7``.\n",
"- A boolean array.\n",
"- A ``callable`` function with one argument (the calling Series or\n",
" DataFrame) and that returns valid output for indexing (one of the above).\n",
" This is useful in method chains, when you don't have a reference to the\n",
" calling object, but would like to base your selection on some value.\n",
"\n",
"``.iloc`` will raise ``IndexError`` if a requested indexer is\n",
"out-of-bounds, except *slice* indexers which allow out-of-bounds\n",
"indexing (this conforms\n"
]
}
],
"source": [
"# To only select every second row, we use the iloc property which is documentend here:\n",
"# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html\n",
"\n",
"# in short: it allows access to the dataframe cells by purely integer indexing (without using column names etc.)\n",
"\n",
"print(tmp3.iloc.__doc__[:800])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e9a21170",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tmp4 :=\n",
"1 Austria NaN\n",
"2 Austria NaN\n",
"3 Austria -1.000000\n",
"4 Austria -0.755929\n",
"5 Austria 0.693375\n",
"Name: Belgium, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"(type(tmp4)) := pandas.core.series.Series"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# the slicing-syntax of iloc is like for numpy arrays: `start:stop:step` for rows and columns \n",
"# `0::2` means: beginning with row index 0 until the end (no stop index) with a stepsizt of 2\n",
"# the `-1` means: just select the last column (index -1), no colon -> no slice\n",
"\n",
"# for more information see: https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding\n",
"\n",
"tmp4 = tmp3.iloc[0::2, -1] ##:\\n\n",
"type(tmp4) ##:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5a0c1e94",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tmp5 :=\n",
"0 NaN\n",
"1 NaN\n",
"2 -1.000000\n",
"3 -0.755929\n",
"4 0.693375\n",
"Name: Belgium, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"---"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# finally, we install a new index beginning at 0:\n",
"tmp5 = tmp4.reset_index(drop=True)##:\\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}