{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pandas DataFrame with N/As\n", "\n", "Solving GitHub [issue number 6](https://github.com/ideonate/nb2xls/issues/6)\n", "\n", "[Source](https://pandas.pydata.org/pandas-docs/stable/user_guide/cookbook.html#cookbook-multi-index)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2
01
a1123
2345
b1678
a1345
\n", "
" ], "text/plain": [ " 2\n", "0 1 \n", "a 1 123\n", " 2 345\n", "b 1 678\n", "a 1 345" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "np.random.seed(0)\n", "\n", "df = pd.DataFrame([['a',1,123], ['a',2,345], ['b',1,678], ['a',1,345]]).set_index([0,1])\n", "df" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rowOne_XOne_YTwo_XTwo_Y
001.11.21.111.22
111.11.21.111.22
221.11.21.111.22
\n", "
" ], "text/plain": [ " row One_X One_Y Two_X Two_Y\n", "0 0 1.1 1.2 1.11 1.22\n", "1 1 1.1 1.2 1.11 1.22\n", "2 2 1.1 1.2 1.11 1.22" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({'row': [0, 1, 2],\n", " 'One_X': [1.1, 1.1, 1.1],\n", " 'One_Y': [1.2, 1.2, 1.2],\n", " 'Two_X': [1.11, 1.11, 1.11],\n", " 'Two_Y': [1.22, 1.22, 1.22]})\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
One_XOne_YTwo_XTwo_Y
row
01.11.21.111.22
11.11.21.111.22
21.11.21.111.22
\n", "
" ], "text/plain": [ " One_X One_Y Two_X Two_Y\n", "row \n", "0 1.1 1.2 1.11 1.22\n", "1 1.1 1.2 1.11 1.22\n", "2 1.1 1.2 1.11 1.22" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.set_index('row'); df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OneTwo
XYXY
row
01.11.21.111.22
11.11.21.111.22
21.11.21.111.22
\n", "
" ], "text/plain": [ " One Two \n", " X Y X Y\n", "row \n", "0 1.1 1.2 1.11 1.22\n", "1 1.1 1.2 1.11 1.22\n", "2 1.1 1.2 1.11 1.22" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns = pd.MultiIndex.from_tuples([tuple(c.split('_'))\n", " for c in df.columns])\n", "\n", "df " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
level_1XY
row
0One1.101.20
0Two1.111.22
1One1.101.20
1Two1.111.22
2One1.101.20
2Two1.111.22
\n", "
" ], "text/plain": [ " level_1 X Y\n", "row \n", "0 One 1.10 1.20\n", "0 Two 1.11 1.22\n", "1 One 1.10 1.20\n", "1 Two 1.11 1.22\n", "2 One 1.10 1.20\n", "2 Two 1.11 1.22" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.stack(0).reset_index(1); df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SampleAll_XAll_Y
row
0One1.101.20
0Two1.111.22
1One1.101.20
1Two1.111.22
2One1.101.20
2Two1.111.22
\n", "
" ], "text/plain": [ " Sample All_X All_Y\n", "row \n", "0 One 1.10 1.20\n", "0 Two 1.11 1.22\n", "1 One 1.10 1.20\n", "1 Two 1.11 1.22\n", "2 One 1.10 1.20\n", "2 Two 1.11 1.22" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns = ['Sample', 'All_X', 'All_Y']; df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABC
OIOIOI
n1.7640520.4001570.9787382.2408931.867558-0.977278
m0.950088-0.151357-0.1032190.4105990.1440441.454274
\n", "
" ], "text/plain": [ " A B C \n", " O I O I O I\n", "n 1.764052 0.400157 0.978738 2.240893 1.867558 -0.977278\n", "m 0.950088 -0.151357 -0.103219 0.410599 0.144044 1.454274" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols = pd.MultiIndex.from_tuples([(x, y) for x in ['A', 'B', 'C']\n", " for y in ['O', 'I']])\n", "\n", "df = pd.DataFrame(np.random.randn(2, 6), index=['n', 'm'], columns=cols)\n", "df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABC
OIOIOI
n0.944577-0.4094610.524074-2.2929951.01.0
m6.595840-0.104078-0.7165810.2823391.01.0
\n", "
" ], "text/plain": [ " A B C \n", " O I O I O I\n", "n 0.944577 -0.409461 0.524074 -2.292995 1.0 1.0\n", "m 6.595840 -0.104078 -0.716581 0.282339 1.0 1.0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.div(df['C'], level=1); df" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MyData
AAone11
six22
BBone33
two44
six55
\n", "
" ], "text/plain": [ " MyData\n", "AA one 11\n", " six 22\n", "BB one 33\n", " two 44\n", " six 55" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coords = [('AA', 'one'), ('AA', 'six'), ('BB', 'one'), ('BB', 'two'),\n", " ('BB', 'six')]\n", "\n", "index = pd.MultiIndex.from_tuples(coords)\n", "\n", "df = pd.DataFrame([11, 22, 33, 44, 55], index, ['MyData']); df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExamsLabs
IIIIII
StudentCourse
AdaComp70717273
Math71737574
Sci72757575
QuinnComp73747576
Math74767877
Sci75787878
VioletComp76777879
Math77798180
Sci78818181
\n", "
" ], "text/plain": [ " Exams Labs \n", " I II I II\n", "Student Course \n", "Ada Comp 70 71 72 73\n", " Math 71 73 75 74\n", " Sci 72 75 75 75\n", "Quinn Comp 73 74 75 76\n", " Math 74 76 78 77\n", " Sci 75 78 78 78\n", "Violet Comp 76 77 78 79\n", " Math 77 79 81 80\n", " Sci 78 81 81 81" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import itertools\n", "\n", "index = list(itertools.product(['Ada', 'Quinn', 'Violet'],\n", " ['Comp', 'Math', 'Sci']))\n", "\n", "headr = list(itertools.product(['Exams', 'Labs'], ['I', 'II']))\n", "\n", "indx = pd.MultiIndex.from_tuples(index, names=['Student', 'Course'])\n", "\n", "cols = pd.MultiIndex.from_tuples(headr) # Notice these are un-named\n", "\n", "data = [[70 + x + y + (x * y) % 3 for x in range(4)] for y in range(9)]\n", "\n", "df = pd.DataFrame(data, indx, cols); df\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }