{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%run ../../common_functions/import_all.py\n",
"\n",
"from common_functions.setup_notebook import set_css_style, setup_matplotlib, config_ipython\n",
"config_ipython()\n",
"setup_matplotlib()\n",
"set_css_style()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Some notes on Pandas\n",
"\n",
"Nothing too deep: some basic introduction to the library."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataframes & Series"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initialisation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" colA | \n",
" colB | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 2 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 4 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" colA colB\n",
"0 NaN NaN\n",
"1 NaN NaN\n",
"2 NaN NaN\n",
"3 NaN NaN\n",
"4 NaN NaN"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"0 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
"dtype: object"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create DataFrame from a dict\n",
"d = {'name': ['n1', 'n1', 'n2', 'n2', 'n2'], 'id': ['1','2','3','4','5'], 'col_A': [4, 39, 49, 1,0]}\n",
"df = pd.DataFrame(d)\n",
"\n",
"# create DF from list of dicts\n",
"dicts_list = [{'a': 1, 'b': 'bla'}, {'a': 2, 'b': 'blabla'}]\n",
"df = pd.DataFrame(dicts_list)\n",
"\n",
"# Create DF from a list\n",
"df = pd.DataFrame([1, 2, 3])\n",
"\n",
"# Create empty dataframe, giving some column names and the array of the number of samples\n",
"# col values will be initialised to NaN\n",
"df = pd.DataFrame(columns=['colA', 'colB'], index=np.arange(5))\n",
"df\n",
"\n",
"s = pd.Series([np.arange(10)])\n",
"s"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Metadata"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 0 | \n",
" 0.3 | \n",
" a | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 0 | \n",
" 0.5 | \n",
" b | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 1 | \n",
" 0.7 | \n",
" c | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d\n",
"0 1 0 0.3 a\n",
"1 2 0 0.5 b\n",
"2 3 1 0.7 c"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create one DF to start off with\n",
"df = pd.DataFrame({'a': [1, 2, 3], 'b': [0, 0, 1], 'c': [0.3, 0.5, 0.7], 'd': ['a', 'b', 'c']})\n",
"\n",
"# Printing Jupyter-rendered DataFrame\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* Columns: Index(['a', 'b', 'c', 'd'], dtype='object')\n",
"* One column: 0 1\n",
"1 2\n",
"2 3\n",
"Name: a, dtype: int64\n",
"* Transpose 0 1 2\n",
"a 1 2 3\n",
"b 0 0 1\n",
"c 0.3 0.5 0.7\n",
"d a b c\n",
"* Types a int64\n",
"b int64\n",
"c float64\n",
"d object\n",
"dtype: object\n",
"* Dense/sparse types? a int64:dense\n",
"b int64:dense\n",
"c float64:dense\n",
"d object:dense\n",
"dtype: object\n",
"* Shape (3, 4)\n",
"* Size (num elements) 12\n",
"* Num of dimensions 2\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/martina/Desktop/Mallzee/repos/plantation/venv/lib/python3.7/site-packages/ipykernel_launcher.py:14: FutureWarning: DataFrame.ftypes is deprecated and will be removed in a future version. Use DataFrame.dtypes instead.\n",
" \n"
]
}
],
"source": [
"# Get dataframe columns\n",
"print('* Columns: ', df.columns)\n",
"\n",
"# Getting column type\n",
"print('* One column: ', df.a)\n",
"\n",
"# \"Transpose\" of dataframe\n",
"print('* Transpose', df.T)\n",
"\n",
"# Types in df\n",
"print('* Types', df.dtypes)\n",
"\n",
"# Are types dense/sparse?\n",
"print('* Dense/sparse types?', df.ftypes)\n",
"\n",
"# Shape of df (in a Numpy fashion)\n",
"print('* Shape', df.shape)\n",
"print('* Size (num elements)', df.size)\n",
"print('* Num of dimensions', df.ndim)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### SQL-like queries"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alice | \n",
" 25 | \n",
" False | \n",
" 0.3 | \n",
" 1.71 | \n",
"
\n",
" \n",
" | 1 | \n",
" Barbara | \n",
" 29 | \n",
" False | \n",
" 0.5 | \n",
" 1.60 | \n",
"
\n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
" True | \n",
" 0.7 | \n",
" 1.52 | \n",
"
\n",
" \n",
" | 3 | \n",
" Paula | \n",
" 45 | \n",
" False | \n",
" 0.1 | \n",
" 1.52 | \n",
"
\n",
" \n",
" | 4 | \n",
" Kimberly | \n",
" 29 | \n",
" True | \n",
" 0.6 | \n",
" 1.52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height\n",
"0 Alice 25 False 0.3 1.71\n",
"1 Barbara 29 False 0.5 1.60\n",
"2 Martina 29 True 0.7 1.52\n",
"3 Paula 45 False 0.1 1.52\n",
"4 Kimberly 29 True 0.6 1.52"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create one DF to start off with\n",
"df = pd.DataFrame(\n",
" {'name': ['Alice', 'Barbara', 'Martina', 'Paula', 'Kimberly'],\n",
" 'age': [25, 29, 29, 45, 29], \n",
" 'is_good': [False, False, True, False, True], \n",
" 'score': [0.3, 0.5, 0.7, 0.1, 0.6], \n",
" 'height': [1.71, 1.60, 1.52, 1.52, 1.52]\n",
" })\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Alice\n",
"1 Barbara\n",
"2 Martina\n",
"3 Paula\n",
"4 Kimberly\n",
"Name: name, dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alice | \n",
" 25 | \n",
"
\n",
" \n",
" | 1 | \n",
" Barbara | \n",
" 29 | \n",
"
\n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
"
\n",
" \n",
" | 3 | \n",
" Paula | \n",
" 45 | \n",
"
\n",
" \n",
" | 4 | \n",
" Kimberly | \n",
" 29 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age\n",
"0 Alice 25\n",
"1 Barbara 29\n",
"2 Martina 29\n",
"3 Paula 45\n",
"4 Kimberly 29"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# SELECT \n",
"\n",
"# Select one column (this gives a Series object)\n",
"df.name\n",
"# or (same) df['name']\n",
"\n",
"# Select two columns (this gives a DataFrame object)\n",
"df[['name', 'age']]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" Barbara | \n",
" 29 | \n",
" False | \n",
" 0.5 | \n",
" 1.60 | \n",
"
\n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
" True | \n",
" 0.7 | \n",
" 1.52 | \n",
"
\n",
" \n",
" | 4 | \n",
" Kimberly | \n",
" 29 | \n",
" True | \n",
" 0.6 | \n",
" 1.52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height\n",
"1 Barbara 29 False 0.5 1.60\n",
"2 Martina 29 True 0.7 1.52\n",
"4 Kimberly 29 True 0.6 1.52"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
" True | \n",
" 0.7 | \n",
" 1.52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height\n",
"2 Martina 29 True 0.7 1.52"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# WHERE \n",
"\n",
"# on one column\n",
"df[df.age == 29]\n",
"\n",
"# on two columns\n",
"df[(df.age == 29) & (df.name == 'Martina')]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" Barbara | \n",
" 29 | \n",
" False | \n",
" 0.5 | \n",
" 1.60 | \n",
"
\n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
" True | \n",
" 0.7 | \n",
" 1.52 | \n",
"
\n",
" \n",
" | 3 | \n",
" Paula | \n",
" 45 | \n",
" False | \n",
" 0.1 | \n",
" 1.52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height\n",
"1 Barbara 29 False 0.5 1.60\n",
"2 Martina 29 True 0.7 1.52\n",
"3 Paula 45 False 0.1 1.52"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alice | \n",
" 25 | \n",
" False | \n",
" 0.3 | \n",
" 1.71 | \n",
"
\n",
" \n",
" | 4 | \n",
" Kimberly | \n",
" 29 | \n",
" True | \n",
" 0.6 | \n",
" 1.52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height\n",
"0 Alice 25 False 0.3 1.71\n",
"4 Kimberly 29 True 0.6 1.52"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# LIKE and NOT LIKE \n",
"\n",
"df[df['name'].str.contains('a')]\n",
"df[~df['name'].str.contains('a')] # (for negation)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" | age | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 25 | \n",
" 0.0 | \n",
" 0.3 | \n",
" 1.71 | \n",
"
\n",
" \n",
" | 29 | \n",
" 2.0 | \n",
" 1.8 | \n",
" 4.64 | \n",
"
\n",
" \n",
" | 45 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 1.52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" is_good score height\n",
"age \n",
"25 0.0 0.3 1.71\n",
"29 2.0 1.8 4.64\n",
"45 0.0 0.1 1.52"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" | age | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 25 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 29 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" | 45 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name is_good score height\n",
"age \n",
"25 1 1 1 1\n",
"29 3 3 3 3\n",
"45 1 1 1 1"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"age\n",
"25 1\n",
"29 3\n",
"45 1\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"age\n",
"25 1\n",
"29 3\n",
"45 1\n",
"Name: score, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" is_good | \n",
" score | \n",
"
\n",
" \n",
" | age | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 25 | \n",
" False | \n",
" 0.3 | \n",
"
\n",
" \n",
" | 29 | \n",
" True | \n",
" 0.6 | \n",
"
\n",
" \n",
" | 45 | \n",
" False | \n",
" 0.1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" is_good score\n",
"age \n",
"25 False 0.3\n",
"29 True 0.6\n",
"45 False 0.1"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" is_good | \n",
" score | \n",
"
\n",
" \n",
" | age | \n",
" height | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 25 | \n",
" 1.71 | \n",
" 0.0 | \n",
" 0.3 | \n",
"
\n",
" \n",
" | 29 | \n",
" 1.52 | \n",
" 2.0 | \n",
" 1.3 | \n",
"
\n",
" \n",
" | 1.60 | \n",
" 0.0 | \n",
" 0.5 | \n",
"
\n",
" \n",
" | 45 | \n",
" 1.52 | \n",
" 0.0 | \n",
" 0.1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" is_good score\n",
"age height \n",
"25 1.71 0.0 0.3\n",
"29 1.52 2.0 1.3\n",
" 1.60 0.0 0.5\n",
"45 1.52 0.0 0.1"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# GROUPBY\n",
"\n",
"# Grouping on one col\n",
"df.groupby('age').sum() # getting some for the other columns\n",
"df.groupby('age').count() # getting counts of values for the other columns (excludes null values)\n",
"df.groupby('age').size() # getting num of items per grouped value\n",
"df.groupby('age')['score'].count() # count to a single column\n",
"df.groupby('age') \\\n",
" .agg(\n",
" {'is_good': np.max, \n",
" 'score': np.mean}) # agg allows to run different aggregating functions on each col\n",
" \n",
"# Grouping on more cols\n",
"df.groupby(['age', 'height']).sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Some operations"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alice | \n",
" 25 | \n",
" False | \n",
" 0.3 | \n",
" 1.71 | \n",
"
\n",
" \n",
" | 1 | \n",
" Barbara | \n",
" 29 | \n",
" False | \n",
" 0.5 | \n",
" 1.60 | \n",
"
\n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
" True | \n",
" 0.7 | \n",
" 1.52 | \n",
"
\n",
" \n",
" | 3 | \n",
" Paula | \n",
" 45 | \n",
" False | \n",
" 0.1 | \n",
" 1.52 | \n",
"
\n",
" \n",
" | 4 | \n",
" Kimberly | \n",
" 29 | \n",
" True | \n",
" 0.6 | \n",
" 1.52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height\n",
"0 Alice 25 False 0.3 1.71\n",
"1 Barbara 29 False 0.5 1.60\n",
"2 Martina 29 True 0.7 1.52\n",
"3 Paula 45 False 0.1 1.52\n",
"4 Kimberly 29 True 0.6 1.52"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create one DF to start off with\n",
"df = pd.DataFrame(\n",
" {'name': ['Alice', 'Barbara', 'Martina', 'Paula', 'Kimberly'],\n",
" 'age': [25, 29, 29, 45, 29], \n",
" 'is_good': [False, False, True, False, True], \n",
" 'score': [0.3, 0.5, 0.7, 0.1, 0.6], \n",
" 'height': [1.71, 1.60, 1.52, 1.52, 1.52]\n",
" })\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"array([False, True])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"is_good\n",
"False 3\n",
"True 2\n",
"Name: name, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"is_good\n",
"False 3\n",
"Name: name, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"is_good\n",
"False True\n",
"True False\n",
"Name: name, dtype: bool"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"name\n",
"Alice 0.3\n",
"Barbara 0.5\n",
"Kimberly 0.6\n",
"Martina 0.7\n",
"Paula 0.1\n",
"Name: score, dtype: float64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Alice name age is_good score height new_col\n",
"0 Alice 25 False 0.3 1.71 -0.2\n",
"Barbara name age is_good score height new_col\n",
"1 Barbara 29 False 0.5 1.6 0.0\n",
"Kimberly name age is_good score height new_col\n",
"4 Kimberly 29 True 0.6 1.52 0.1\n",
"Martina name age is_good score height new_col\n",
"2 Martina 29 True 0.7 1.52 0.2\n",
"Paula name age is_good score height new_col\n",
"3 Paula 45 False 0.1 1.52 -0.4\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
" new_col | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alice | \n",
" 25 | \n",
" False | \n",
" 0.3 | \n",
" 1.71 | \n",
" -0.2 | \n",
"
\n",
" \n",
" | 1 | \n",
" Barbara | \n",
" 29 | \n",
" False | \n",
" 0.5 | \n",
" 1.60 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
" True | \n",
" 0.7 | \n",
" 1.52 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | 3 | \n",
" Paula | \n",
" 45 | \n",
" False | \n",
" 0.1 | \n",
" 1.52 | \n",
" -0.4 | \n",
"
\n",
" \n",
" | 4 | \n",
" Kimberly | \n",
" 29 | \n",
" True | \n",
" 0.6 | \n",
" 1.52 | \n",
" 0.1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height new_col\n",
"0 Alice 25 False 0.3 1.71 -0.2\n",
"1 Barbara 29 False 0.5 1.60 0.0\n",
"2 Martina 29 True 0.7 1.52 0.2\n",
"3 Paula 45 False 0.1 1.52 -0.4\n",
"4 Kimberly 29 True 0.6 1.52 0.1"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" is_good | \n",
" score | \n",
" height | \n",
" new_col | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2 | \n",
" Martina | \n",
" 29 | \n",
" True | \n",
" 0.7 | \n",
" 1.52 | \n",
" 0.2 | \n",
"
\n",
" \n",
" | 4 | \n",
" Kimberly | \n",
" 29 | \n",
" True | \n",
" 0.6 | \n",
" 1.52 | \n",
" 0.1 | \n",
"
\n",
" \n",
" | 1 | \n",
" Barbara | \n",
" 29 | \n",
" False | \n",
" 0.5 | \n",
" 1.60 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 0 | \n",
" Alice | \n",
" 25 | \n",
" False | \n",
" 0.3 | \n",
" 1.71 | \n",
" -0.2 | \n",
"
\n",
" \n",
" | 3 | \n",
" Paula | \n",
" 45 | \n",
" False | \n",
" 0.1 | \n",
" 1.52 | \n",
" -0.4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age is_good score height new_col\n",
"2 Martina 29 True 0.7 1.52 0.2\n",
"4 Kimberly 29 True 0.6 1.52 0.1\n",
"1 Barbara 29 False 0.5 1.60 0.0\n",
"0 Alice 25 False 0.3 1.71 -0.2\n",
"3 Paula 45 False 0.1 1.52 -0.4"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"age 31.400\n",
"is_good 0.400\n",
"score 0.440\n",
"height 1.574\n",
"new_col -0.060\n",
"dtype: float64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get index of column from name\n",
"df.columns.get_loc(\"score\")\n",
"\n",
"#List unique values in the df['is_good'] column\n",
"df.is_good.unique()\n",
"\n",
"# Do the same counting and then sort, counts is a Series\n",
"counts = df.groupby('is_good').count()['name']\n",
"counts.sort_values(ascending=False)\n",
"\n",
"# mask the count (where)\n",
"counts[counts > 2]\n",
"\n",
"# and this is the mask\n",
"counts > 2\n",
"\n",
"#groupby does create GroupBy object, not DataFrame\n",
"#to sum score values per name group (other operations are count, mean, ...), this gives DataFrame\n",
"#but the col over which grouping has been done is the index\n",
"#name will be the indices\n",
"grouped = df.groupby('name')\n",
"summed = grouped['score'].sum()\n",
"summed\n",
"indices = summed.index.get_level_values('name')\n",
"for group_name, group in grouped:\n",
" print(group_name, group)\n",
"# need to use as_index=False when grouping to prevent grouping cols to be treated as indices rather than columns\n",
" \n",
"# To create a new column by manipulating existing ones\n",
"df['new_col'] = df['score'] - 0.5\n",
"df\n",
"\n",
"# to sort dataframe on column\n",
"df.sort_values('new_col', ascending=False)\n",
"\n",
"# df with all the means of columns\n",
"df.mean()\n",
"\n",
"# filling the NaNs with 0\n",
"df = df.fillna(value=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting from dataframes"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMsAAALwCAYAAACJAPL2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAWJQAAFiUBSVIk8AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzdP2yU9/3A8Q8/sdzZXSx6FzHFTseeV+yM4Twa0ykS9pKooUPTSmEJUdJKDZVKhmRJh8RIZTGRYlkqxls5VjDZembFMKEcYYvvbvRv4GM3YP4Y/ODH2K/XBM89d99PJfod3nnue4fW19fXAwAAAACI/yt7AAAAAADYK8QyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAdLjsAXjo3r17ZY/Aa+jIkSMREfHgwYOSJwEOMnsRUDb7ELAX2Iv2lqNHj770ez1ZBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAACkw2UP8DpZXFyMn3/+OXq9XqytrcX09HTU6/WyxwIAAACgIGLZNs3NzcXvfve7GBgYiIiH4ezs2bPxz3/+c/MaAAAAAK83X8Pcpps3b0an09n8+9TUVHS73Wi1WiVOBQAAAECR9u2TZd1uNz7//PP44osvnnnf6upqtFqtqFarERHR6/Vienp6y9Nia2trcf/+/RgZGXnk+s8//1zs4AAAAACUZl/GslarFZcuXXrufe12Oy5cuBDnz5/fjGPtdjvOnj37yLWIiIsXLz7y3tXV1YiI+M1vflPg5AAAAACUaV99DXN2djbOnTsX3W43BgcHn3v/hQsXtjxFNjo6GrVa7bmx7dKlS9FoNGJsbGzHcwMAAACwN+yrJ8tOnz69+efr16/H2traU+9tt9vR6XSi0WhseW18fDxmZ2cf+bxfWlxcjLW1ted+xRMAAACA18u+erLsRSwvL0dEPPGXLGu1WkT876uWj7/v9u3bQhkAAADAPnRgY9nt27ejXq8/8bWN6ysrK49cb7fb0W6348yZM5vXFhcXX92QAAAAAOyqffU1zBdx//79555r1ul0Nv+8uroai4uLMTU1Fe12OyIexrM33njjlc4JAAAAwO45sLFsOz8C8MszzzZ+OODxp83Onz//SuYDAAAAYPcd2FgWEVGtVp94fSOidbvdzWsXL17c0Voff/zxE69vnH125MiRHX0+B9Phww//L+zfD1AmexFQNvsQsBfYi/aPA3tm2bNsPFH2pMP/AQAAANi/DuyTZQMDA9Hr9Z55z/O+pvkinvfrmQ8ePChsLQ6Ojf9i4d8PUCZ7EVA2+xCwF9iL9pajR4++9HsP7JNlRYYwAAAAAPaHAxvLarXaI792+Usb10dGRnZzJAAAAABKdmBj2fj4+FNf2/h6plgGAAAAcLAc2FjWaDQiIqLdbm95rd1uR71eF8sAAAAADph9G8ued3h/vV6PsbGxaLVaj1zvdrtx48aNmJ6efpXjAQAAALAHHVpfX18ve4iiLC4uRrvdjtXV1eh2uxHx8Fcva7VaNBqNmJmZ2fKec+fOxfDwcMzMzESn04nZ2dkYHR2NqampXZ393r17u7oe+4NfWwH2AnsRUDb7ELAX2Iv2lp38Gua+imUva3V1NVZWVmJgYCAajUbU6/Vdn0Es42XYjIG9wF4ElM0+BOwF9qK9ZSex7HCBc7y2RkZGnE8GAAAAwP49swwAAAAAXpRYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAACkw2UPcBD1+/3o9/sRETE0NFTyNAAAAABsEMtKsLS0FAsLCxERMT8/X/I0AAAAAGwQy0owOTkZzWaz7DEAAAAAeIxYVoJKpRKVSqXsMQAAAAB4jAP+AQAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASIfLHuAg6vf70e/3IyJiaGio5GkAAAAA2CCWlWBpaSkWFhYiImJ+fr7kaQAAAADYIJaVYHJyMprNZtljAAAAAPAYsawElUolKpVK2WMAAAAA8BgH/AMAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAANLhsgc4iPr9fvT7/YiIGBoaKnkaAAAAADaIZSVYWlqKhYWFiIiYn58veRoAAAAANohlJZicnIxms1n2GAAAAAA8RiwrQaVSiUqlUvYYAAAAADzGAf8AAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAANLhsgc4iPr9fvT7/YiIGBoaKnkaAAAAADaIZSVYWlqKhYWFiIiYn58veRoAAAAANohlJZicnIxms1n2GAAAAAA8RiwrQaVSiUqlUvYYAAAAADzGAf8AAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAAKTDZQ9wEPX7/ej3+xERMTQ0VPI0AAAAAGwQy0qwtLQUCwsLERExPz9f8jQAAAAAbBDLSjA5ORnNZrPsMQAAAAB4jFhWgkqlEpVKpewxAAAAAHiMA/4BAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAANLhsgc4iPr9fvT7/YiIGBoaKnkaAAAAADaIZSVYWlqKhYWFiIiYn58veRoAAAAANohlJZicnIxms1n2GAAAAAA8RiwrQaVSiUqlUvYYAAAAADzGAf8AAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACBtO5b1er1XOQcAAAAAlO7wdm987733YmBgIBqNRoyOjkaj0YharfZCi127di3eeuutePPNN190TgAAAAB45bYdyyIiut1uLC8vx/LyckREDAwMxOjoaIyPj0ej0YhqtfrM94+MjMR//vOfuHPnTjSbzTh+/PjLTw4AAAAABTu0vr6+vp0b33333Zieno6IiP/+979x69atLffU6/VHnjx7VjxrtVrR6/XixIkTLzn6/nLv3r2yR+A1dOTIkYiIePDgQcmTAAeZvQgom30I2AvsRXvL0aNHX/q9L/Rk2djYWNRqtc3AdefOnVhZWdmMZ51OJzqdTrRarYh4djxrNptx4cKFlx4cAAAAAIq27Vj20UcfbTmjbHh4OIaHh7cdz0ZGRmJkZCTq9XpERKyurhb1vwMAAAAAdmzbsWxsbOy59zwvnq2urj4SyM6cOfMSIwMAAADAq/FCX8N8UU+LZxH/+0onAAAAAOwVrzSWPW4jngEAAADAXvRKYtndu3fj9u3b0e12Y2BgIMbHx5/5y5gAAAAAsBcUGsvu3r0b33777ZaD+2dnZ2N0dDSmp6fjzTffLHJJAAAAAChMYbGs1+vF3/72t4iIOHbsWEQ8PKPs/v37ERHRbrej3W7H1NRUnDp1qqhlAQAAAKAwhcWyubm5aDabMT09veW1O3fuxNWrV+PGjRuxuLgYvV4vfv/73xe1NAAAAAAU4v+K+qC1tbUnhrKIhwf7nz59Oi5evBiTk5Nx9erVWFpaKmppAAAAAChEYbHs0KFD27pvZmYmPv3005ibm4uffvqpqOUBAAAAYMcKi2W9Xm/b946OjsYHH3wQV69eLWp5AAAAANixwmLZr3/96/jhhx+2fX+z2Yw7d+4UtTwAAAAA7FhhsWxqaiq++eabF/pqZaVSKWp5AAAAANixwmJZvV6Pd955Jz788MNtP2G23XPOAAAAAGA3HC7yw2ZmZuLOnTvx5ZdfxhtvvBHNZjOOHz8e1Wp1y703b96M9fX1IpcHAAAAgB05tP4KitVXX30VN2/e3Px7vV6P4eHhqNfrERFx586daLfb8fXXX0etVit6+dfSvXv3yh6B19CRI0ciIuLBgwclTwIcZPYioGz2IWAvsBftLUePHn3p9xb6ZNmGM2fOxPLycnz77bfR6/Wi0+lEp9PZfL1arcZnn30mlAEAAACwp7ySWBYRMTY2FmNjY7GyshLtdjs6nU4MDg7GyMhIvP3220/8aiYAAAAAlOmVxbINjUYjGo3Gq14GAAAAAHassF/D3K5r167F/fv3t1z/5RlnAAAAAFCGXY1lV65cidnZ2Th79uyW19bX1+PKlSu7OQ4AAAAAPGLXnyz77LPP4tSpU1uuj42NRbVafeJTZwAAAACwG3Y1lq2trUWj0Yhms/nE15vNZrRard0cCQAAAAA27Wos+/HHH597z+Dg4C5MAgAAAABb7Wosazab8cknn0S/33/qPWtra7s4EQAAAAD8z67GstHR0RgeHo4//vGPsbS0FL1eb8s923n6DAAAAABehcO7veDp06cjImJubi7m5uaiXq/H8PBwDA4Oxo0bN+IPf/jDbo8EAAAAABFRQiyLeBjMxsbGYnFxMVZXV2N5eTmq1WpMT0/HsWPHyhgJAAAAAMqJZREPv5I5Ojpa1vIAAAAAsMWunlkGAAAAAHtZYbHs/fffL+qjAAAAAKAUhcWybrcbP/zwQ1EfBwAAAAC7rtCvYX755Zfx3XffRa/XK/JjAQAAAGBXFHrA//DwcNRqtZibm4tutxujo6MxPj4e1Wq1yGUAAAAA4JUoNJb99a9/fSSMLS8vxzfffBOHDh2Kt99+O44dO1bkcgAAAABQqEPr6+vrRXzQ/fv3o1arPfG1Xq8X169fj+Xl5RgYGIiJiYn47W9/W8Sy+8a9e/fKHoHX0JEjRyIi4sGDByVPAhxk9iKgbPYhYC+wF+0tR48efen3FvZk2dNCWUREtVqNZrMZzWYzWq1WnDt3LgYGBqLZbMapU6eKGgEAAAAAdqSwWNbr9Z56NtmtW7fixo0b0Wq1Nq+tr6/H4OBgUcsDAAAAwI4VFsvee++9+P777zf/fvfu3bh+/Xq0Wq3odrsR8fAJs/Hx8ZiYmIjh4eGilgYAAACAQhR6wP/S0lKsr69Hq9WKTqezef3YsWMxMTERjUajyOUAAAAAoFCFxrK5ubnNP9fr9ZienvYLmAAAAAC8NgqNZbVabfMJsqtXr24+YTY2NvbMHwAAAAAAgL2g0Fj2l7/8ZTOKnT59OiIiVlZW4vLly/HTTz/F2NhYjI+PP/WHAAAAAACgTIXFslqt9sSnxxqNxuZZZcvLy/Htt99Gr9eL8fHxeOedd4paHgAAAAB2rLBY9vXXXz/z9Vu3bsXKykosLy9HRES73Y65ubn417/+VdQIAAAAALAjhcWyTz75JP7xj388cu3u3btx/fr1aLVa0e12N68PDw/H22+/Hc1ms6jlAQAAAGDHCotlq6ur8dNPP8X6+nosLy9vHu6/oVarxdjYWExMTDjsHwAAAIA9qdAD/j/88MNH/l6tVmN8fDwmJiZieHi4yKUAAAAAoHCFxrINx44di4mJic2D/QEAAADgdVBoLJuamopTp04V+ZH7Ur/fj36/HxERQ0NDJU8DAAAAwIZCY9nJkyeL/Lh9a2lpKRYWFiIiYn5+vuRpAAAAANhQWCz74IMPolqtFvVx+9rk5KRfAgUAAADYgwqLZeLP9lUqlahUKmWPAQAAAMBjXskB/1euXImVlZXodDpRr9fj008/3Xzt73//e0xMTMSxY8dexdIAAAAA8NIKjWW3bt2Kr776Krrd7ua1gYGBR+757LPP4tKlS9HpdOLEiRNFLg8AAAAAO/J/RX3Q/fv349y5c1Gr1WJ6ejrOnz8f33//fQwPD2+5d3p6On7++ee4e/duUcsDAAAAwI4VFssuX74cJ06ciPPnz8eJEyc2I9mhQ4eeeP/09HT8+9//Lmp5AAAAANixQp8sm56efqH3rK+vF7U8AAAAAOxYYbHs8bPJtqPf7xe1PAAAAADsWGGxrNfrvfD9a2trRS0PAAAAADtWWCyrVCrxww8/bLn+tK9aXr58+YmH/wMAAABAWQqLZSdPnowvv/wyvvvuu0euP+mA/0uXLkWr1YqZmZmilgcAAACAHTtc1AeNjIzE5ORkLC4uRqvVitHR0RgZGYnV1dW4du1adLvduH37dqysrES3242PPvooqtVqUcsDAAAAwI4dWi/4JykXFxe3PF32uA8++CCazWaRy7727t27V/YIvIaOHDkSEREPHjwoeRLgILMXAWWzDwF7gb1obzl69OhLv7ewJ8s2TE1NxcTERMzNzcWNGzc2D/6v1WrRaDTi5MmTUavVil4WAAAAAHas8CfLeDmeLONl+C8XwF5gLwLKZh8C9gJ70d6ykyfLCjvg/2U87+uaAAAAALCbSo1lrVarzOUBAAAA4BGFn1kWEdHr9WJtbW3zz0/y448/RrfbfRXLAwAAAMBLKTSWXbt2LRYXF6PT6RT5sQAAAACwKwqLZcvLyzE7O7v592q1+sz7n/bEGQAAAACUpbBYdvny5Th+/HjMzMw8N5RFRHS73Xj//feLWh4AAAAAdqywWDY4OBinT5/e9v0DAwPbimoAAAAAsFsK+zXMer3+wu85c+ZMUcsDAAAAwI4VFsvW19df+D2NRqOo5QEAAABgxwqLZePj43HlypUXes+f//znopYHAAAAgB0rLJY1Go2oVqvx3Xffbfs9nU6nqOUBAAAAYMcKO+D/5s2b8atf/SpWV1fj/fffj7feeitqtVoMDAw88X6hDAAAAIC9prBY9s0330Sv19v8e7vdLuqjAQAAAGBXFBbLBgcHI+Lh2WXb0el04tatW0UtDwAAAAA7VlgsGxgYiJMnT8bx48e3/Z533323qOUBAAAAYMcKO+C/VqtFvV5/ofdUq9WilgcAAACAHSvsybIzZ8688HsuXrxY1PIAAAAAsGOFPVkGAAAAAK87sQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkA6XPcBB1O/3o9/vR0TE0NBQydMAAAAAsEEsK8HS0lIsLCxERMT8/HzJ0wAAAACwQSwrweTkZDSbzbLHAAAAAOAxYlkJKpVKVCqVsscAAAAA4DEO+AcAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllAAAAAJDEMgAAAABIYhkAAAAAJLEMAAAAAJJYBgAAAABJLAMAAACAJJYBAAAAQBLLAAAAACCJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAAAAAEhiGQAAAAAksQwAAAAAklgGAAAAAEksAwAAAIAklgEAAABAEssAAAAAIIllL2h1dTX+9Kc/lT0GAAAAAK/A4bIHeF202+1YXl6OiIhOp1PyNAAAAAC8Cp4s26bR0dE4ffp0jI2NlT0KAAAAAK/Ivn2yrNvtxueffx5ffPHFM+9bXV2NVqsV1Wo1IiJ6vV5MT0/HwMDAbowJAAAAwB6yL2NZq9WKS5cuPfe+drsdFy5ciPPnz2/GsXa7HWfPnn3kGgAAAAAHw776Gubs7GycO3cuut1uDA4OPvf+CxcubHmKbHR0NGq12rZiGwAAAAD7y756suz06dObf1fX+okAAA+3SURBVL5+/Xqsra099d52ux2dTicajcaW18bHx2N2dvaRzwMAAABg/9tXT5a9iI1ftnzSVy1rtVpEPDzPDAAAAICD48DGstu3b0e9Xn/iaxvXV1ZWdnMkAAAAAEp2YGPZ/fv3n3tPp9PZhUkAAAAA2Cv21ZllL2I7PwLwyzPPOp1OXL16dfNps6+++ireeuutmJqaeqVzAgAAALB7Dmwsi4ioVqtPvL4R0brd7ua1er0eMzMzL73Wxx9//MTrX3zxRUREHDly5KU/m4Pr8OGH/xf27wcok70IKJt9CNgL7EX7x4H9GuazbDxR9qTD/wEAAADYvw7sk2UDAwPR6/Weec/zvqb5IjaeIHuaBw8eFLYWB8fGf7Hw7wcok70IKJt9CNgL7EV7y9GjR1/6vQf2ybIiQxgAAAAA+8OBjWW1Wu2pv3a5cX1kZGQ3RwIAAACgZAc2lo2Pjz/1tY2vZ4plAAAAAAfLgY1ljUYjIiLa7faW19rtdtTrdbEMAAAA4IDZt7HseYf31+v1GBsbi1ar9cj1brcbN27ciOnp6Vc5HgAAAAB70KH19fX1socoyuLiYrTb7VhdXY1utxsRD3/1slarRaPRiJmZmS3vOXfuXAwPD8fMzEx0Op2YnZ2N0dHRmJqa2tXZ7927t6vrsT/4tRVgL7AXAWWzDwF7gb1ob9nJr2Huq1j2slZXV2NlZSUGBgai0WhEvV7f9RnEMl6GzRjYC+xFQNnsQ8BeYC/aW3YSyw4XOMdra2RkxPlkAAAAAOzfM8sAAAAA4EWJZQAAAACQxDIAAAAASGIZAAAAACSxDAAAAACSWAYAAAAASSwDAAAAgCSWAQAAAEASywAAAAAgiWUAAAAAkMQyAID/b+/egduo3j4Av8wXmkhpKLy0NnSM1CKndkoQNDRyG6eBAjdJkTAMpgiNKUKDU7iRmv8wkwjKiDZxWqklomVT0KBVSn8FuyKK4/tFFz/PDDNkV3v2rGf0zvHP55wFAICcsAwAAAAAcsIyAAAAAMgJywAAAAAgJywDAAAAgJywDAAAAAByVybdgcvo1atX8erVq4iIeO+99ybcGwAAAAAKwrIJ+O233+KXX36JiIj//e9/E+4NAAAAAAVh2QR88sknsbKyMuluAAAAAPCGd3Z3d3cn3QkAAAAAmAY2+AcAAACAnLDskvn777/jiy++iL///nsu7nmWbZ9FW6dp4yTX3r59O27fvn3se3E0k/i+nJdpe5aL7s+s1KGzau+kbZz0OrXo/Ezbd/c0pvFZ1KLza8uYaL5M4/f3pKbxWdSi82tLLZovF/ldEZYBAAAAQE5YBgAAAAC5//v222+/nXQnuFhXrlyJjz76KN599925uOdZtn0WbZ2mjeNe2+l0IiLixo0bx74XRzOJ78t5mbZnuej+zEodOqv2TtrGSa5Ti87XtH13T2Man0UtOr+2jInmyzR+f09qGp9FLTq/ttSi+XJR3xVvw4QZVqyH/+GHHybcE+AyU4uASVOHgGmgFs0PyzABAAAAIGdmGQAAAADkzCwDAAAAgJywDAAAAAByVybdAWBy2u12/PPPPzEcDmMwGESj0YgkSSbdLeCS6ff78eOPP8aDBw8m3RVgzhVjn2vXrkWaplGv1419gAtn7DP9hGVwSTWbzfj888+jVCpFxL+Dxzt37sRPP/00OgZwnrrdbuzs7ERERJqmE+4NMO86nU6kaRpra2sR8W/d+f777/2yClwYY5/ZYRkmXFLPnz8fK9D1ej2yLItOpzPBXgGXSbVajbW1tajVapPuCnAJtFqtWFlZGf07SZK4evXq6BdXgPNm7DM7zCyDCciyLL777rv44YcfDvxcv9+PTqcTV69ejYiI4XAYjUbjTGZ+DQaDePnyZSwtLY0d/+eff07dNjD9pqEOARzkLOtUmqaRZdme2lUul+Pp06d+cQXeynjp8hKWwQXrdDrRarUO/Vy3242HDx/G/fv3R0W22+3GnTt3xo6d1Pb29ti/+/1+RER8+OGHp2oXmH7TUocA9nPWdWq/5U6lUimyLDu7jgNzw3jpcrMMEy7I1tZWbGxsRJZlUS6XD/38w4cP9/w1olqtxsLCwpGK9nG1Wq2oVCr+sgpzbNrrEMB51anhcBgR8dY2B4PBGfQcmBfGS0SYWQYXpthMNiLi6dOnBw7Mut1upGkalUplz7nl5eXY2toaay/iaBtElsvlt/5lo91ux2AwOHR6MTDbprkOAUScX50qlkYNBoOxGnTUX4aBy+O8x0vMBmEZTKFio9m3/UK5sLAQEf8umyz2G3v9rSoHWVpaGtvYtrjXixcvBGXAmIusQwAncZw6lSRJRMSeJZeDwWB0DuC4jjteYnYIy2AKvXjxYt+BW3G81+uNim61Wo1qtXrs+3S73eh2u7G+vj461m63o16vn6DXwDy5qDoEcFLHqVNJkkSpVNozQ2Q4HMb169fPva/AfDrueInZYc8ymEIvX7489DNHWe50kH6/H+12O2q12ig0azablkcBEXExdQjgNI5bpz777LOxGbDFOfu1AidlvDS/zCyDKXSU/TNOuxltsWllr9cbO37//v1TtQvMh4uoQ2maxpMnT0Z1aHNzMz744AOzW4EjOW6dqtfr0Ww2o9lsxrVr1yJN07h79+55dxOYY8etQ8Y+s0NYBlOq2Ij2TUUxPu1rzre3t091PTD/zrsOJUkSq6urp2oDuNyOW6fUHOCsHacOGfvMDsswYcYUf5mwXBKYFHUImHbqFDBp6tBsE5bBFCqVSjEcDg/8jNecA+dJHQKmnToFTJo6NL+EZTCFFFRg0tQhYNqpU8CkqUPzS1gGU2hhYWHft6YUx71+GDhP6hAw7dQpYNLUofklLIMptLy8vO+5YpqvogucJ3UImHbqFDBp6tD8EpbBFKpUKhER0e1295zrdruRJImiC5wrdQiYduoUMGnq0PwSlsEEHLYJZJIkUavVotPpjB3PsiyePXsWjUbjPLsHXALqEDDt1Clg0tShy+ud3d3d3Ul3Ai6Ddrsd3W43+v1+ZFkWEf++PWVhYSEqlUqsrq7uuWZjYyMWFxdjdXU10jSNra2tqFarUa/XL7r7wBxQh4Bpp04Bk6YOESEsg6nX7/ej1+tFqVSKSqUSSZJMukvAJaMOAdNOnQImTR2aL8IyAAAAAMjZswwAAAAAcsIyAAAAAMgJywAAAAAgJywDAAAAgJywDAAAAABywjIAAAAAyAnLAAAAACAnLAMAAACAnLAMAAAAAHLCMgAAAADICcsAAAAAICcsAwAAAICcsAwAAAAAcsIyAAAAAMgJywAAAAAgJywDAAAAgJywDAAAAABywjIAAAAAyAnLAAAAACB3ZdIdAADgaLIsi0ePHo0dGw6HUa1WI03TKJVKsbKysu91vV4vyuXy6PiNGzeiVqsder9erxdJkkSWZRERsby8vO99Njc3YzAYxHA4jDRN48GDB5EkSWxtbcVgMIg///wzVlZWol6v77m+3+/H48ePI8uyKJVKkWVZJEkS9Xo9kiQ58s8JAOA03tnd3d2ddCcAADhYmqZx586duHfvXiwtLY2da7fb0Wq1otFo7AmhdnZ2YnNzM2q1Wty6dStKpdLoXLfbjZ2dnVhbW9tzv06nE61WK27dujUWqGVZFj///HOkaRrffPPNWHvF/dI0jVarFRERDx48GPUtIuKrr74aHX89AGs2m/Hrr7/G+vr62P2K43fv3o1qtXqsnxkAwElYhgkAMAO2trZieXl5T1AWEVGv16NSqew53u/3Y3NzMxYXF2N9fX1PsNXpdEb/vW5nZye2trb2BGUREaVSKdbX16NcLsedO3dGs80KtVptrD9PnjyJ69evR5IkUS6Xo1QqRalUGgvK2u12/Prrr/Hpp5/uud/q6mosLi7Gjz/+eISfEgDA6QnLAABmQL/fj8FgsO/55eXlPceKgOnWrVtvvabX60VExF9//TU6ViylTJLkwCWa9Xo90jSNn3/++cB+93q9UTulUim2t7dje3t77H6tVitKpVKsrq6+tY3r169HlmXRbrcPvBcAwFkQlgEAzIByuRw7Ozv7BkaVSmVsdlmn04k0TSNJkrfORouIuHfvXjQajfj8889Hx4o90T7++OMD+1MsiSyWXe7ngw8+OLCdYrnm22bGFYpz3W73wLYAAM6CDf4BAGZAo9GIzc3NaLVa0Wq1olKpRLVajUqlEktLS3s2wH/27FlERCwuLu7b5tLS0p4g7fnz5xERce3atUP7VGzCX7wA4G0O25i/6OdBoVrRx5cvXx7aJwCA0xKWAQDMgFqtFuvr69FqtSJN0+j1eqNllEmSRKPRGFs2WQRLr7/98iiKWWJv7m/2NuVyObIsi36/v+9nDgvLij3Pnj59euAMtcXFxWM/CwDASQjLAABmRK1Wi1qtFv1+P3q9XnS73ej3+5GmaWxubsba2lqsrKxMupsncuPGjZntOwAwX+xZBgAwA3Z2dkb/v7S0FPV6Pe7duxfb29vRaDQi4r/9vyIiFhYWIiIOnK31NsWMsjffcvk2xQsHDps9dpDi2qPcDwDgIgjLAABmwEFvnazX6/Hpp59GlmWjcKzYgP+gJZKF1z9TvFXzKCFbEXAd9NbMwxSb97948eLEbQAAnCVhGQDADMiy7MC3QRbhWLGvV71ejyRJDr0uYnxGWr1ej4j/Nt7fTzHTrVKpnGpmWXG/Yv+1g2xsbJz4PgAARyUsAwCYEa+HWm96+fJlJEkytjH/zZs3IyLi4cOH+17XbDZHgVXEfy8LyLIs2u32vtc9evQoSqVSrK2tHecR9kiSJNbW1iLLsmg2mwf288aNG6e6FwDAUQjLAABmxHA4jK2trT37e/X7/Wi1WvH111+PHa9Wq3H37t0YDAZx+/btPde12+14//33R7PSCvV6PdbX1+Px48fR6XTGzqVpGhsbGzEcDuP+/fv7zior3sb5xx9/HPpcKysrsb6+Hr///ns0m809/Ww2m3Ht2rVTLfcEADiqd3Z3d3cn3QkAAA62sbER6+vrkaZpPH78eOxcuVyORqMxNqvsdVmWxaNHj+L58+dx9erVUcC1srKyJyh787pWqxW9Xi8WFhZGG/rv9+bKLMviyy+/3BN2lUqlKJfLcfPmzSPd78WLF1Eul0fP89lnn8XS0tK+1wEAnCVhGQAAAADkLMMEAAAAgJywDAAAAABywjIAAAAAyAnLAAAAACAnLAMAAACAnLAMAAAAAHLCMgAAAADICcsAAAAAICcsAwAAAICcsAwAAAAAcsIyAAAAAMgJywAAAAAgJywDAAAAgJywDAAAAABywjIAAAAAyAnLAAAAACAnLAMAAACAnLAMAAAAAHLCMgAAAADICcsAAAAAIPf/uhdswSdmaoQAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"image/png": {
"height": 376,
"width": 613
}
},
"output_type": "display_data"
}
],
"source": [
"# Plotting directly from pandas\n",
"df.plot('score', 'new_col', kind='scatter', logx=True, logy=True)\n",
"plt.show();\n",
"\n",
"\n",
"# #two dataframes plots on same figure\n",
"# # ax = df_tmp.plot()\n",
"# # df2.plot(ax=ax)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}