{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 5 new changes in pandas you need to know about ([video](https://www.youtube.com/watch?v=te5JrSCW-LY&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=33))\n", "\n", "- [My pandas video series (30 videos)](http://www.dataschool.io/easier-data-analysis-with-pandas/)\n", "- [GitHub repository](https://github.com/justmarkham/pandas-videos)\n", "- [pandas release notes](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'0.22.0'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "pd.__version__" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. `ix` has been deprecated\n", "\n", "*New in 0.20.0*" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcoholcontinent
country
Afghanistan0000.0Asia
Albania89132544.9Europe
Algeria250140.7Africa
Andorra24513831212.4Europe
Angola21757455.9Africa
\n", "
" ], "text/plain": [ " beer_servings spirit_servings wine_servings \\\n", "country \n", "Afghanistan 0 0 0 \n", "Albania 89 132 54 \n", "Algeria 25 0 14 \n", "Andorra 245 138 312 \n", "Angola 217 57 45 \n", "\n", " total_litres_of_pure_alcohol continent \n", "country \n", "Afghanistan 0.0 Asia \n", "Albania 4.9 Europe \n", "Algeria 0.7 Africa \n", "Andorra 12.4 Europe \n", "Angola 5.9 Africa " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read the drinks dataset into a DataFrame\n", "drinks = pd.read_csv('http://bit.ly/drinksbycountry', index_col='country')\n", "drinks.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "57" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# loc accesses by label\n", "drinks.loc['Angola', 'spirit_servings']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "57" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# iloc accesses by position\n", "drinks.iloc[4, 1]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/kevin/miniconda3/envs/pd22.0/lib/python3.5/site-packages/ipykernel_launcher.py:2: DeprecationWarning: \n", ".ix is deprecated. Please use\n", ".loc for label based indexing or\n", ".iloc for positional indexing\n", "\n", "See the documentation here:\n", "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n", " \n" ] }, { "data": { "text/plain": [ "57" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ix accesses by label OR position (newly deprecated)\n", "drinks.ix['Angola', 1]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "57" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# alternative: use loc\n", "drinks.loc['Angola', drinks.columns[1]]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "57" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# alternative: use iloc\n", "drinks.iloc[drinks.index.get_loc('Angola'), 1]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "57" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ix accesses by label OR position (newly deprecated)\n", "drinks.ix[4, 'spirit_servings']" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "57" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# alternative: use loc\n", "drinks.loc[drinks.index[4], 'spirit_servings']" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "57" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# alternative: use iloc\n", "drinks.iloc[4, drinks.columns.get_loc('spirit_servings')]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- [More information](http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated)\n", "- [Video: How do I select multiple rows and columns from a pandas DataFrame?](https://www.youtube.com/watch?v=xvpNA7bC8cs&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=19)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Aliases have been added for `isnull` and `notnull`\n", "\n", "*New in 0.21.0*" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0IthacaNaNTRIANGLENY6/1/1930 22:00
1WillingboroNaNOTHERNJ6/30/1930 20:00
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read the UFO dataset into a DataFrame\n", "ufo = pd.read_csv('http://bit.ly/uforeports')\n", "ufo.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0FalseTrueFalseFalseFalse
1FalseTrueFalseFalseFalse
2FalseTrueFalseFalseFalse
3FalseTrueFalseFalseFalse
4FalseTrueFalseFalseFalse
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 False True False False False\n", "1 False True False False False\n", "2 False True False False False\n", "3 False True False False False\n", "4 False True False False False" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check which values are missing\n", "ufo.isnull().head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0TrueFalseTrueTrueTrue
1TrueFalseTrueTrueTrue
2TrueFalseTrueTrueTrue
3TrueFalseTrueTrueTrue
4TrueFalseTrueTrueTrue
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 True False True True True\n", "1 True False True True True\n", "2 True False True True True\n", "3 True False True True True\n", "4 True False True True True" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check which values are not missing\n", "ufo.notnull().head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
12BeltonREDSPHERESC6/30/1939 20:00
19Bering SeaREDOTHERAK4/30/1943 23:00
36PortsmouthREDFORMATIONVA7/10/1945 1:30
44BlairsdenGREENSPHERECA6/30/1946 19:00
82San JoseBLUECHEVRONCA7/15/1947 21:00
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "12 Belton RED SPHERE SC 6/30/1939 20:00\n", "19 Bering Sea RED OTHER AK 4/30/1943 23:00\n", "36 Portsmouth RED FORMATION VA 7/10/1945 1:30\n", "44 Blairsden GREEN SPHERE CA 6/30/1946 19:00\n", "82 San Jose BLUE CHEVRON CA 7/15/1947 21:00" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# drop rows with missing values\n", "ufo.dropna().head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0IthacaUNKNOWNTRIANGLENY6/1/1930 22:00
1WillingboroUNKNOWNOTHERNJ6/30/1930 20:00
2HolyokeUNKNOWNOVALCO2/15/1931 14:00
3AbileneUNKNOWNDISKKS6/1/1931 13:00
4New York Worlds FairUNKNOWNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 Ithaca UNKNOWN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro UNKNOWN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke UNKNOWN OVAL CO 2/15/1931 14:00\n", "3 Abilene UNKNOWN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair UNKNOWN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# fill in missing values\n", "ufo.fillna(value='UNKNOWN').head()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0FalseTrueFalseFalseFalse
1FalseTrueFalseFalseFalse
2FalseTrueFalseFalseFalse
3FalseTrueFalseFalseFalse
4FalseTrueFalseFalseFalse
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 False True False False False\n", "1 False True False False False\n", "2 False True False False False\n", "3 False True False False False\n", "4 False True False False False" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# new alias for isnull\n", "ufo.isna().head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0TrueFalseTrueTrueTrue
1TrueFalseTrueTrueTrue
2TrueFalseTrueTrueTrue
3TrueFalseTrueTrueTrue
4TrueFalseTrueTrueTrue
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 True False True True True\n", "1 True False True True True\n", "2 True False True True True\n", "3 True False True True True\n", "4 True False True True True" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# new alias for notnull\n", "ufo.notna().head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#na-naming-changes)\n", "- [Video: How do I handle missing values in pandas?](https://www.youtube.com/watch?v=fCMrO_VzeL8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=16)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. `drop` now accepts \"index\" and \"columns\" keywords\n", "\n", "*New in 0.21.0*" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0IthacaNaNTRIANGLENY6/1/1930 22:00
1WillingboroNaNOTHERNJ6/30/1930 20:00
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read the UFO dataset into a DataFrame\n", "ufo = pd.read_csv('http://bit.ly/uforeports')\n", "ufo.head()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
5Valley CityNaNDISKND9/15/1934 15:30
6Crater LakeNaNCIRCLECA6/15/1935 0:00
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00\n", "5 Valley City NaN DISK ND 9/15/1934 15:30\n", "6 Crater Lake NaN CIRCLE CA 6/15/1935 0:00" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# old way to drop rows: specify labels and axis\n", "ufo.drop([0, 1], axis=0).head()\n", "ufo.drop([0, 1], axis='index').head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
5Valley CityNaNDISKND9/15/1934 15:30
6Crater LakeNaNCIRCLECA6/15/1935 0:00
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00\n", "5 Valley City NaN DISK ND 9/15/1934 15:30\n", "6 Crater Lake NaN CIRCLE CA 6/15/1935 0:00" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# new way to drop rows: specify index\n", "ufo.drop(index=[0, 1]).head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Colors ReportedShape ReportedTime
0NaNTRIANGLE6/1/1930 22:00
1NaNOTHER6/30/1930 20:00
2NaNOVAL2/15/1931 14:00
3NaNDISK6/1/1931 13:00
4NaNLIGHT4/18/1933 19:00
\n", "
" ], "text/plain": [ " Colors Reported Shape Reported Time\n", "0 NaN TRIANGLE 6/1/1930 22:00\n", "1 NaN OTHER 6/30/1930 20:00\n", "2 NaN OVAL 2/15/1931 14:00\n", "3 NaN DISK 6/1/1931 13:00\n", "4 NaN LIGHT 4/18/1933 19:00" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# old way to drop columns: specify labels and axis\n", "ufo.drop(['City', 'State'], axis=1).head()\n", "ufo.drop(['City', 'State'], axis='columns').head()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Colors ReportedShape ReportedTime
0NaNTRIANGLE6/1/1930 22:00
1NaNOTHER6/30/1930 20:00
2NaNOVAL2/15/1931 14:00
3NaNDISK6/1/1931 13:00
4NaNLIGHT4/18/1933 19:00
\n", "
" ], "text/plain": [ " Colors Reported Shape Reported Time\n", "0 NaN TRIANGLE 6/1/1930 22:00\n", "1 NaN OTHER 6/30/1930 20:00\n", "2 NaN OVAL 2/15/1931 14:00\n", "3 NaN DISK 6/1/1931 13:00\n", "4 NaN LIGHT 4/18/1933 19:00" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# new way to drop columns: specify columns\n", "ufo.drop(columns=['City', 'State']).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#drop-now-also-accepts-index-columns-keywords)\n", "- [Video: How do I remove columns from a pandas DataFrame?](https://www.youtube.com/watch?v=gnUKkS964WQ&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=6)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. `rename` and `reindex` now accept \"axis\" keyword\n", "\n", "*New in 0.21.0*" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CITYColors ReportedShape ReportedSTATETime
0IthacaNaNTRIANGLENY6/1/1930 22:00
1WillingboroNaNOTHERNJ6/30/1930 20:00
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " CITY Colors Reported Shape Reported STATE Time\n", "0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# old way to rename columns: specify columns\n", "ufo.rename(columns={'City':'CITY', 'State':'STATE'}).head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CITYColors ReportedShape ReportedSTATETime
0IthacaNaNTRIANGLENY6/1/1930 22:00
1WillingboroNaNOTHERNJ6/30/1930 20:00
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " CITY Colors Reported Shape Reported STATE Time\n", "0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# new way to rename columns: specify mapper and axis\n", "ufo.rename({'City':'CITY', 'State':'STATE'}, axis='columns').head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CITYCOLORS REPORTEDSHAPE REPORTEDSTATETIME
0IthacaNaNTRIANGLENY6/1/1930 22:00
1WillingboroNaNOTHERNJ6/30/1930 20:00
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " CITY COLORS REPORTED SHAPE REPORTED STATE TIME\n", "0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# note: mapper can be a function\n", "ufo.rename(str.upper, axis='columns').head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#rename-reindex-now-also-accept-axis-keyword)\n", "- [Video: How do I rename columns in a pandas DataFrame?](https://www.youtube.com/watch?v=0uBirYFhizE&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Ordered categories must be specified independent of the data\n", "\n", "*New in 0.21.0*" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDquality
0100good
1101very good
2102good
3103excellent
\n", "
" ], "text/plain": [ " ID quality\n", "0 100 good\n", "1 101 very good\n", "2 102 good\n", "3 103 excellent" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create a small DataFrame\n", "df = pd.DataFrame({'ID':[100, 101, 102, 103],\n", " 'quality':['good', 'very good', 'good', 'excellent']})\n", "df" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/kevin/miniconda3/envs/pd22.0/lib/python3.5/site-packages/ipykernel_launcher.py:2: FutureWarning: specifying 'categories' or 'ordered' in .astype() is deprecated; pass a CategoricalDtype instead\n", " \n" ] }, { "data": { "text/plain": [ "0 good\n", "1 very good\n", "2 good\n", "3 excellent\n", "Name: quality, dtype: category\n", "Categories (3, object): [good < very good < excellent]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# old way to create an ordered category (deprecated)\n", "df.quality.astype('category', categories=['good', 'very good', 'excellent'], ordered=True)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 good\n", "1 very good\n", "2 good\n", "3 excellent\n", "Name: quality, dtype: category\n", "Categories (3, object): [good < very good < excellent]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# new way to create an ordered category\n", "from pandas.api.types import CategoricalDtype\n", "quality_cat = CategoricalDtype(['good', 'very good', 'excellent'], ordered=True)\n", "df['quality'] = df.quality.astype(quality_cat)\n", "df.quality" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#whatsnew-0210-enhancements-categorical-dtype)\n", "- [Video: How do I make my pandas DataFrame smaller and faster?](https://www.youtube.com/watch?v=wDYDYGyN_cw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=21)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 2 }