# Pandas DataFrame with N/As

Solving GitHub [issue number 6](https://github.com/ideonate/nb2xls/issues/6)

[Source](https://pandas.pydata.org/pandas-docs/stable/user_guide/cookbook.html#cookbook-multi-index)

In [1]:
import pandas as pd
import numpy as np
np.random.seed(0)

df = pd.DataFrame([['a',1,123], ['a',2,345], ['b',1,678], ['a',1,345]]).set_index([0,1])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2
0,1,Unnamed: 2_level_1
a,1,123
a,2,345
b,1,678
a,1,345


In [2]:
df = pd.DataFrame({'row': [0, 1, 2],
                    'One_X': [1.1, 1.1, 1.1],
                    'One_Y': [1.2, 1.2, 1.2],
                    'Two_X': [1.11, 1.11, 1.11],
                    'Two_Y': [1.22, 1.22, 1.22]})
df

Unnamed: 0,row,One_X,One_Y,Two_X,Two_Y
0,0,1.1,1.2,1.11,1.22
1,1,1.1,1.2,1.11,1.22
2,2,1.1,1.2,1.11,1.22


In [3]:
df = df.set_index('row'); df

Unnamed: 0_level_0,One_X,One_Y,Two_X,Two_Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1.1,1.2,1.11,1.22
1,1.1,1.2,1.11,1.22
2,1.1,1.2,1.11,1.22


In [4]:
df.columns = pd.MultiIndex.from_tuples([tuple(c.split('_'))
                                         for c in df.columns])

df 

Unnamed: 0_level_0,One,One,Two,Two
Unnamed: 0_level_1,X,Y,X,Y
row,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,1.1,1.2,1.11,1.22
1,1.1,1.2,1.11,1.22
2,1.1,1.2,1.11,1.22


In [5]:
df = df.stack(0).reset_index(1); df

Unnamed: 0_level_0,level_1,X,Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,One,1.1,1.2
0,Two,1.11,1.22
1,One,1.1,1.2
1,Two,1.11,1.22
2,One,1.1,1.2
2,Two,1.11,1.22


In [6]:
df.columns = ['Sample', 'All_X', 'All_Y']; df

Unnamed: 0_level_0,Sample,All_X,All_Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,One,1.1,1.2
0,Two,1.11,1.22
1,One,1.1,1.2
1,Two,1.11,1.22
2,One,1.1,1.2
2,Two,1.11,1.22


In [7]:
cols = pd.MultiIndex.from_tuples([(x, y) for x in ['A', 'B', 'C']
                                   for y in ['O', 'I']])

df = pd.DataFrame(np.random.randn(2, 6), index=['n', 'm'], columns=cols)
df

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,O,I,O,I,O,I
n,1.764052,0.400157,0.978738,2.240893,1.867558,-0.977278
m,0.950088,-0.151357,-0.103219,0.410599,0.144044,1.454274


In [8]:
df = df.div(df['C'], level=1); df

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,O,I,O,I,O,I
n,0.944577,-0.409461,0.524074,-2.292995,1.0,1.0
m,6.59584,-0.104078,-0.716581,0.282339,1.0,1.0


In [9]:
coords = [('AA', 'one'), ('AA', 'six'), ('BB', 'one'), ('BB', 'two'),
           ('BB', 'six')]

index = pd.MultiIndex.from_tuples(coords)

df = pd.DataFrame([11, 22, 33, 44, 55], index, ['MyData']); df

Unnamed: 0,Unnamed: 1,MyData
AA,one,11
AA,six,22
BB,one,33
BB,two,44
BB,six,55


In [10]:
import itertools

index = list(itertools.product(['Ada', 'Quinn', 'Violet'],
                                ['Comp', 'Math', 'Sci']))

headr = list(itertools.product(['Exams', 'Labs'], ['I', 'II']))

indx = pd.MultiIndex.from_tuples(index, names=['Student', 'Course'])

cols = pd.MultiIndex.from_tuples(headr)   # Notice these are un-named

data = [[70 + x + y + (x * y) % 3 for x in range(4)] for y in range(9)]

df = pd.DataFrame(data, indx, cols); df


Unnamed: 0_level_0,Unnamed: 1_level_0,Exams,Exams,Labs,Labs
Unnamed: 0_level_1,Unnamed: 1_level_1,I,II,I,II
Student,Course,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Ada,Comp,70,71,72,73
Ada,Math,71,73,75,74
Ada,Sci,72,75,75,75
Quinn,Comp,73,74,75,76
Quinn,Math,74,76,78,77
Quinn,Sci,75,78,78,78
Violet,Comp,76,77,78,79
Violet,Math,77,79,81,80
Violet,Sci,78,81,81,81
