{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'3.6.9'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from platform import python_version\n", "\n", "python_version()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('1.0.5', '1.19.0')" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "pd.__version__, np.__version__" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameage
0alice25
1bob26
2charlie27
3david22
\n", "
" ], "text/plain": [ " name age\n", "0 alice 25\n", "1 bob 26\n", "2 charlie 27\n", "3 david 22" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({\n", " 'name': ['alice','bob','charlie','david'],\n", " 'age': [25,26,27,22],\n", "})[['name', 'age']]\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## apply example" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameagename_uppercase
0alice25ALICE
1bob26BOB
2charlie27CHARLIE
3david22DAVID
\n", "
" ], "text/plain": [ " name age name_uppercase\n", "0 alice 25 ALICE\n", "1 bob 26 BOB\n", "2 charlie 27 CHARLIE\n", "3 david 22 DAVID" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame({\n", " 'name': ['alice','bob','charlie','david'],\n", " 'age': [25,26,27,22],\n", "})[['name', 'age']]\n", "\n", "# each element of the age column is a string\n", "# so you can call .upper() on it\n", "df['name_uppercase'] = df['name'].apply(lambda element: element.upper())\n", "\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## custom function" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameagefirst_letter
0alice25a
1bob26b
2charlie27c
3david22d
\n", "
" ], "text/plain": [ " name age first_letter\n", "0 alice 25 a\n", "1 bob 26 b\n", "2 charlie 27 c\n", "3 david 22 d" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame({\n", " 'name': ['alice','bob','charlie','david'],\n", " 'age': [25,26,27,22],\n", "})[['name', 'age']]\n", "\n", "\n", "def first_letter(input_str):\n", " return input_str[:1]\n", "\n", "# each element of the age column is a string\n", "# so you can call .upper() on it\n", "df['first_letter'] = df['name'].apply(first_letter)\n", "\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Take multiple columns as parameters" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameageconcatenated
0alice25alice--25
1bob26bob--26
2charlie27charlie--27
3david22david--22
\n", "
" ], "text/plain": [ " name age concatenated\n", "0 alice 25 alice--25\n", "1 bob 26 bob--26\n", "2 charlie 27 charlie--27\n", "3 david 22 david--22" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame({\n", " 'name': ['alice','bob','charlie','david'],\n", " 'age': [25,26,27,22],\n", "})[['name', 'age']]\n", "\n", "\n", "def concatenate(value_1, value_2):\n", " return str(value_1)+ \"--\" + str(value_2) \n", "\n", "# note the use of DOUBLE SQUARE BRACKETS!\n", "df['concatenated'] = df[['name','age']].apply(lambda row: concatenate(row['name'], row['age']) , axis=1)\n", "\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Apply function to row" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
value1value2value3value4sum_all
0151099.0115.0
1242099.0125.0
2333099.0135.0
3424099.0145.0
45150NaN56.0
\n", "
" ], "text/plain": [ " value1 value2 value3 value4 sum_all\n", "0 1 5 10 99.0 115.0\n", "1 2 4 20 99.0 125.0\n", "2 3 3 30 99.0 135.0\n", "3 4 2 40 99.0 145.0\n", "4 5 1 50 NaN 56.0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame({\n", " 'value1': [1,2,3,4,5],\n", " 'value2': [5,4,3,2,1],\n", " 'value3': [10,20,30,40,50],\n", " 'value4': [99,99,99,99,np.nan],\n", "})\n", "\n", "def sum_all(row):\n", " return np.sum(row)\n", "\n", "# note that apply was called on the dataframe itself, not on columns\n", "df['sum_all'] = df.apply(lambda row: sum_all(row), axis=1)\n", "\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## return multiple values" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameage
0alice25.0
1bob26.0
2charlie27.0
3david22.0
4edwardNaN
\n", "
" ], "text/plain": [ " name age\n", "0 alice 25.0\n", "1 bob 26.0\n", "2 charlie 27.0\n", "3 david 22.0\n", "4 edward NaN" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({\n", " 'name': ['alice','bob','charlie','david','edward'],\n", " 'age': [25,26,27,22,np.nan],\n", "})[['name', 'age']]\n", "df" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameagetimes_2times_3
0alice25.050.075.0
1bob26.052.078.0
2charlie27.054.081.0
3david22.044.066.0
4edwardNaNNaNNaN
\n", "
" ], "text/plain": [ " name age times_2 times_3\n", "0 alice 25.0 50.0 75.0\n", "1 bob 26.0 52.0 78.0\n", "2 charlie 27.0 54.0 81.0\n", "3 david 22.0 44.0 66.0\n", "4 edward NaN NaN NaN" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame({\n", " 'name': ['alice','bob','charlie','david','edward'],\n", " 'age': [25,26,27,22,np.nan],\n", "})[['name', 'age']]\n", "\n", "def times_two_times_three(value):\n", " value_times_2 = value*2\n", " value_times_3 = value*3\n", "\n", " return pd.Series([value_times_2,value_times_3])\n", "\n", "# note that apply was called on age column\n", "df[['times_2','times_3']]= df['age'].apply(times_two_times_three)\n", "df" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameagetimes_2times_3
0alice25.050.075.0
1bob26.052.078.0
2charlie27.054.081.0
3david22.044.066.0
4edwardNaNNaNNaN
\n", "
" ], "text/plain": [ " name age times_2 times_3\n", "0 alice 25.0 50.0 75.0\n", "1 bob 26.0 52.0 78.0\n", "2 charlie 27.0 54.0 81.0\n", "3 david 22.0 44.0 66.0\n", "4 edward NaN NaN NaN" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }