{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b45644b4",
   "metadata": {},
   "source": [
    "This notebook serves as explanation for a stackoverflow solution:\n",
    "\n",
    "\n",
    "# See https://stackoverflow.com/a/64832684/333403\n",
    "\n",
    "<br><br><br>\n",
    "\n",
    "More answers can be found at <https://stackoverflow.com/search?q=user:333403+[pandas]>.\n",
    "\n",
    "Please remember: **upvoting** answers keeps people motivated. (And reputation helps to place bounties and thus get answers to hard questions).\n",
    "\n",
    "<br><br><br>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b45eff63",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sun Dec 25 10:54:25 2022\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "print(time.ctime())\n",
    "\n",
    "try:\n",
    "    import ipydex\n",
    "    # this optional and enables the magic comment `##:` to view intermediate results\n",
    "    %load_ext ipydex.displaytools\n",
    "except ImportError:\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "aa1fa3a7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Austria</th>\n",
       "      <th>Belgium</th>\n",
       "      <th>France</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1993-11-01</td>\n",
       "      <td>6.18</td>\n",
       "      <td>7.05</td>\n",
       "      <td>7.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1993-11-02</td>\n",
       "      <td>6.18</td>\n",
       "      <td>7.05</td>\n",
       "      <td>7.61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1993-11-03</td>\n",
       "      <td>6.17</td>\n",
       "      <td>7.20</td>\n",
       "      <td>7.67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993-11-04</td>\n",
       "      <td>6.17</td>\n",
       "      <td>7.50</td>\n",
       "      <td>7.91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1993-11-15</td>\n",
       "      <td>6.40</td>\n",
       "      <td>7.60</td>\n",
       "      <td>8.61</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "df :=     Timestamp  Austria  Belgium  France\n",
       "1  1993-11-01     6.18     7.05    7.69\n",
       "2  1993-11-02     6.18     7.05    7.61\n",
       "3  1993-11-03     6.17     7.20    7.67\n",
       "4  1993-11-04     6.17     7.50    7.91\n",
       "5  1993-11-15     6.40     7.60    8.61"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# original data\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "df = pd.DataFrame(\n",
    "    {\"Timestamp\" : ['1993-11-01' ,'1993-11-02', '1993-11-03', '1993-11-04','1993-11-15'],\n",
    "     \"Austria\" : [6.18 ,6.18, 6.17, 6.17, 6.40],\n",
    "     \"Belgium\" : [7.05, 7.05, 7.2, 7.5, 7.6],\"France\" : [7.69, 7.61, 7.67, 7.91, 8.61]\n",
    "    },\n",
    "    index = [1, 2, 3,4,5]\n",
    ") ##:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f89a5ee2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "cols := ['Austria', 'Belgium', 'France']"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "col_pairs := [('Austria', 'Belgium'), ('Austria', 'France'), ('Belgium', 'France')]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import itertools as it\n",
    "\n",
    "# omit timestamp-col\n",
    "cols = list(df.columns)[1:] ##:\n",
    "\n",
    "col_pairs = list(it.combinations(cols, 2)) ##:\n",
    "\n",
    "# new data frame (yet empty) \n",
    "res = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "df494994",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Aus_Bel_corr  Aus_Fra_corr  Bel_Fra_corr\n",
      "0           NaN           NaN           NaN\n",
      "1           NaN           NaN           NaN\n",
      "2     -1.000000     -0.277350      0.277350\n",
      "3     -0.755929     -0.654654      0.989743\n",
      "4      0.693375      0.969346      0.849167\n"
     ]
    }
   ],
   "source": [
    "# complete solution\n",
    "\n",
    "for pair in col_pairs:\n",
    "    # select the first three letters of each name of the pair\n",
    "    corr_name = f\"{pair[0][:3]}_{pair[1][:3]}_corr\"\n",
    "    res[corr_name] = df[list(pair)].\\\n",
    "                    rolling(min_periods=1, window=3).\\\n",
    "                    corr().iloc[0::2, -1].reset_index(drop=True)\n",
    "\n",
    "print(str(res))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "38b32962",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pair := ('Austria', 'Belgium')"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "corr_name := 'Aus_Bel_corr'"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# one iteration (i.e. one column):\n",
    "\n",
    "pair = col_pairs[0] ##:\n",
    "corr_name = f\"{pair[0][:3]}_{pair[1][:3]}_corr\" ##:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "857e51c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Austria</th>\n",
       "      <th>Belgium</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6.18</td>\n",
       "      <td>7.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6.18</td>\n",
       "      <td>7.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6.17</td>\n",
       "      <td>7.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6.17</td>\n",
       "      <td>7.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6.40</td>\n",
       "      <td>7.60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "tmp :=    Austria  Belgium\n",
       "1     6.18     7.05\n",
       "2     6.18     7.05\n",
       "3     6.17     7.20\n",
       "4     6.17     7.50\n",
       "5     6.40     7.60"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# select the corresponding columns\n",
    "tmp = df[list(pair)] ##:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1950a772",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tmp2 := Rolling [window=3,min_periods=1,center=False,axis=0,method=single]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# this rolling objects does not yet specify the operation\n",
    "tmp2 = tmp.rolling(min_periods=1, window=3) ##:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "71b32979",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Austria</th>\n",
       "      <th>Belgium</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1</th>\n",
       "      <th>Austria</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belgium</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">2</th>\n",
       "      <th>Austria</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belgium</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">3</th>\n",
       "      <th>Austria</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belgium</th>\n",
       "      <td>-1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">4</th>\n",
       "      <th>Austria</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.755929</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belgium</th>\n",
       "      <td>-0.755929</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">5</th>\n",
       "      <th>Austria</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.693375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belgium</th>\n",
       "      <td>0.693375</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "tmp3 :=             Austria   Belgium\n",
       "1 Austria       NaN       NaN\n",
       "  Belgium       NaN       NaN\n",
       "2 Austria       NaN       NaN\n",
       "  Belgium       NaN       NaN\n",
       "3 Austria  1.000000 -1.000000\n",
       "  Belgium -1.000000  1.000000\n",
       "4 Austria  1.000000 -0.755929\n",
       "  Belgium -0.755929  1.000000\n",
       "5 Austria  1.000000  0.693375\n",
       "  Belgium  0.693375  1.000000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# now specify the operation to `corr`\n",
    "tmp3 = tmp2.corr() ##:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "60e6a811",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([(1, 'Austria'),\n",
       "            (1, 'Belgium'),\n",
       "            (2, 'Austria'),\n",
       "            (2, 'Belgium'),\n",
       "            (3, 'Austria'),\n",
       "            (3, 'Belgium'),\n",
       "            (4, 'Austria'),\n",
       "            (4, 'Belgium'),\n",
       "            (5, 'Austria'),\n",
       "            (5, 'Belgium')],\n",
       "           )"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The previous result contains to much data. We are interested only in every second row.\n",
    "# It also has a \"messed up\" index:\n",
    "tmp3.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "0d758cf3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Purely integer-location based indexing for selection by position.\n",
      "\n",
      "``.iloc[]`` is primarily integer position based (from ``0`` to\n",
      "``length-1`` of the axis), but may also be used with a boolean\n",
      "array.\n",
      "\n",
      "Allowed inputs are:\n",
      "\n",
      "- An integer, e.g. ``5``.\n",
      "- A list or array of integers, e.g. ``[4, 3, 0]``.\n",
      "- A slice object with ints, e.g. ``1:7``.\n",
      "- A boolean array.\n",
      "- A ``callable`` function with one argument (the calling Series or\n",
      "  DataFrame) and that returns valid output for indexing (one of the above).\n",
      "  This is useful in method chains, when you don't have a reference to the\n",
      "  calling object, but would like to base your selection on some value.\n",
      "\n",
      "``.iloc`` will raise ``IndexError`` if a requested indexer is\n",
      "out-of-bounds, except *slice* indexers which allow out-of-bounds\n",
      "indexing (this conforms\n"
     ]
    }
   ],
   "source": [
    "# To only select every second row, we use the iloc property which is documentend here:\n",
    "# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html\n",
    "\n",
    "# in short: it allows access to the dataframe cells by purely integer indexing (without using column names etc.)\n",
    "\n",
    "print(tmp3.iloc.__doc__[:800])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e9a21170",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tmp4 :=\n",
       "1  Austria         NaN\n",
       "2  Austria         NaN\n",
       "3  Austria   -1.000000\n",
       "4  Austria   -0.755929\n",
       "5  Austria    0.693375\n",
       "Name: Belgium, dtype: float64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(type(tmp4)) := pandas.core.series.Series"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# the slicing-syntax of iloc is like for numpy arrays: `start:stop:step` for rows and columns \n",
    "# `0::2` means: beginning with row index 0 until the end (no stop index) with a stepsizt of 2\n",
    "# the `-1` means: just select the last column (index -1), no colon -> no slice\n",
    "\n",
    "# for more information see: https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding\n",
    "\n",
    "tmp4 = tmp3.iloc[0::2, -1] ##:\\n\n",
    "type(tmp4) ##:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "5a0c1e94",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tmp5 :=\n",
       "0         NaN\n",
       "1         NaN\n",
       "2   -1.000000\n",
       "3   -0.755929\n",
       "4    0.693375\n",
       "Name: Belgium, dtype: float64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "---"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# finally, we install a new index beginning at 0:\n",
    "tmp5 = tmp4.reset_index(drop=True)##:\\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}