{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'3.6.9'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from platform import python_version\n",
"\n",
"python_version()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('1.0.5', '1.19.0')"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"pd.__version__, np.__version__"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" alice | \n",
" 25 | \n",
"
\n",
" \n",
" 1 | \n",
" bob | \n",
" 26 | \n",
"
\n",
" \n",
" 2 | \n",
" charlie | \n",
" 27 | \n",
"
\n",
" \n",
" 3 | \n",
" david | \n",
" 22 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age\n",
"0 alice 25\n",
"1 bob 26\n",
"2 charlie 27\n",
"3 david 22"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\n",
" 'name': ['alice','bob','charlie','david'],\n",
" 'age': [25,26,27,22],\n",
"})[['name', 'age']]\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## apply example"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" name_uppercase | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" alice | \n",
" 25 | \n",
" ALICE | \n",
"
\n",
" \n",
" 1 | \n",
" bob | \n",
" 26 | \n",
" BOB | \n",
"
\n",
" \n",
" 2 | \n",
" charlie | \n",
" 27 | \n",
" CHARLIE | \n",
"
\n",
" \n",
" 3 | \n",
" david | \n",
" 22 | \n",
" DAVID | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age name_uppercase\n",
"0 alice 25 ALICE\n",
"1 bob 26 BOB\n",
"2 charlie 27 CHARLIE\n",
"3 david 22 DAVID"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name': ['alice','bob','charlie','david'],\n",
" 'age': [25,26,27,22],\n",
"})[['name', 'age']]\n",
"\n",
"# each element of the age column is a string\n",
"# so you can call .upper() on it\n",
"df['name_uppercase'] = df['name'].apply(lambda element: element.upper())\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## custom function"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" first_letter | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" alice | \n",
" 25 | \n",
" a | \n",
"
\n",
" \n",
" 1 | \n",
" bob | \n",
" 26 | \n",
" b | \n",
"
\n",
" \n",
" 2 | \n",
" charlie | \n",
" 27 | \n",
" c | \n",
"
\n",
" \n",
" 3 | \n",
" david | \n",
" 22 | \n",
" d | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age first_letter\n",
"0 alice 25 a\n",
"1 bob 26 b\n",
"2 charlie 27 c\n",
"3 david 22 d"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name': ['alice','bob','charlie','david'],\n",
" 'age': [25,26,27,22],\n",
"})[['name', 'age']]\n",
"\n",
"\n",
"def first_letter(input_str):\n",
" return input_str[:1]\n",
"\n",
"# each element of the age column is a string\n",
"# so you can call .upper() on it\n",
"df['first_letter'] = df['name'].apply(first_letter)\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Take multiple columns as parameters"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" concatenated | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" alice | \n",
" 25 | \n",
" alice--25 | \n",
"
\n",
" \n",
" 1 | \n",
" bob | \n",
" 26 | \n",
" bob--26 | \n",
"
\n",
" \n",
" 2 | \n",
" charlie | \n",
" 27 | \n",
" charlie--27 | \n",
"
\n",
" \n",
" 3 | \n",
" david | \n",
" 22 | \n",
" david--22 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age concatenated\n",
"0 alice 25 alice--25\n",
"1 bob 26 bob--26\n",
"2 charlie 27 charlie--27\n",
"3 david 22 david--22"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name': ['alice','bob','charlie','david'],\n",
" 'age': [25,26,27,22],\n",
"})[['name', 'age']]\n",
"\n",
"\n",
"def concatenate(value_1, value_2):\n",
" return str(value_1)+ \"--\" + str(value_2) \n",
"\n",
"# note the use of DOUBLE SQUARE BRACKETS!\n",
"df['concatenated'] = df[['name','age']].apply(lambda row: concatenate(row['name'], row['age']) , axis=1)\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Apply function to row"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" value1 | \n",
" value2 | \n",
" value3 | \n",
" value4 | \n",
" sum_all | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 5 | \n",
" 10 | \n",
" 99.0 | \n",
" 115.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 4 | \n",
" 20 | \n",
" 99.0 | \n",
" 125.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 3 | \n",
" 30 | \n",
" 99.0 | \n",
" 135.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 2 | \n",
" 40 | \n",
" 99.0 | \n",
" 145.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 1 | \n",
" 50 | \n",
" NaN | \n",
" 56.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" value1 value2 value3 value4 sum_all\n",
"0 1 5 10 99.0 115.0\n",
"1 2 4 20 99.0 125.0\n",
"2 3 3 30 99.0 135.0\n",
"3 4 2 40 99.0 145.0\n",
"4 5 1 50 NaN 56.0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'value1': [1,2,3,4,5],\n",
" 'value2': [5,4,3,2,1],\n",
" 'value3': [10,20,30,40,50],\n",
" 'value4': [99,99,99,99,np.nan],\n",
"})\n",
"\n",
"def sum_all(row):\n",
" return np.sum(row)\n",
"\n",
"# note that apply was called on the dataframe itself, not on columns\n",
"df['sum_all'] = df.apply(lambda row: sum_all(row), axis=1)\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## return multiple values"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" alice | \n",
" 25.0 | \n",
"
\n",
" \n",
" 1 | \n",
" bob | \n",
" 26.0 | \n",
"
\n",
" \n",
" 2 | \n",
" charlie | \n",
" 27.0 | \n",
"
\n",
" \n",
" 3 | \n",
" david | \n",
" 22.0 | \n",
"
\n",
" \n",
" 4 | \n",
" edward | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age\n",
"0 alice 25.0\n",
"1 bob 26.0\n",
"2 charlie 27.0\n",
"3 david 22.0\n",
"4 edward NaN"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\n",
" 'name': ['alice','bob','charlie','david','edward'],\n",
" 'age': [25,26,27,22,np.nan],\n",
"})[['name', 'age']]\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" times_2 | \n",
" times_3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" alice | \n",
" 25.0 | \n",
" 50.0 | \n",
" 75.0 | \n",
"
\n",
" \n",
" 1 | \n",
" bob | \n",
" 26.0 | \n",
" 52.0 | \n",
" 78.0 | \n",
"
\n",
" \n",
" 2 | \n",
" charlie | \n",
" 27.0 | \n",
" 54.0 | \n",
" 81.0 | \n",
"
\n",
" \n",
" 3 | \n",
" david | \n",
" 22.0 | \n",
" 44.0 | \n",
" 66.0 | \n",
"
\n",
" \n",
" 4 | \n",
" edward | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age times_2 times_3\n",
"0 alice 25.0 50.0 75.0\n",
"1 bob 26.0 52.0 78.0\n",
"2 charlie 27.0 54.0 81.0\n",
"3 david 22.0 44.0 66.0\n",
"4 edward NaN NaN NaN"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'name': ['alice','bob','charlie','david','edward'],\n",
" 'age': [25,26,27,22,np.nan],\n",
"})[['name', 'age']]\n",
"\n",
"def times_two_times_three(value):\n",
" value_times_2 = value*2\n",
" value_times_3 = value*3\n",
"\n",
" return pd.Series([value_times_2,value_times_3])\n",
"\n",
"# note that apply was called on age column\n",
"df[['times_2','times_3']]= df['age'].apply(times_two_times_three)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" times_2 | \n",
" times_3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" alice | \n",
" 25.0 | \n",
" 50.0 | \n",
" 75.0 | \n",
"
\n",
" \n",
" 1 | \n",
" bob | \n",
" 26.0 | \n",
" 52.0 | \n",
" 78.0 | \n",
"
\n",
" \n",
" 2 | \n",
" charlie | \n",
" 27.0 | \n",
" 54.0 | \n",
" 81.0 | \n",
"
\n",
" \n",
" 3 | \n",
" david | \n",
" 22.0 | \n",
" 44.0 | \n",
" 66.0 | \n",
"
\n",
" \n",
" 4 | \n",
" edward | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age times_2 times_3\n",
"0 alice 25.0 50.0 75.0\n",
"1 bob 26.0 52.0 78.0\n",
"2 charlie 27.0 54.0 81.0\n",
"3 david 22.0 44.0 66.0\n",
"4 edward NaN NaN NaN"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}