{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from pandas import Series, DataFrame" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "np.random.seed(12345)\n", "import matplotlib.pyplot as plt\n", "plt.rc(\"figure\", figsize=(10, 6))\n", "PREVIOUS_MAX_ROWS = pd.options.display.max_rows\n", "pd.options.display.max_rows = 20\n", "pd.options.display.max_columns = 20\n", "pd.options.display.max_colwidth = 80\n", "np.set_printoptions(precision=4, suppress=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series([4, 7, -5, 3])\n", "obj" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "obj.array\n", "obj.index" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "obj2 = pd.Series([4, 7, -5, 3], index=[\"d\", \"b\", \"a\", \"c\"])\n", "obj2\n", "obj2.index" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "obj2[\"a\"]\n", "obj2[\"d\"] = 6\n", "obj2[[\"c\", \"a\", \"d\"]]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "obj2[obj2 > 0]\n", "obj2 * 2\n", "import numpy as np\n", "np.exp(obj2)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "\"b\" in obj2\n", "\"e\" in obj2" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "sdata = {\"Ohio\": 35000, \"Texas\": 71000, \"Oregon\": 16000, \"Utah\": 5000}\n", "obj3 = pd.Series(sdata)\n", "obj3" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "obj3.to_dict()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "states = [\"California\", \"Ohio\", \"Oregon\", \"Texas\"]\n", "obj4 = pd.Series(sdata, index=states)\n", "obj4" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "pd.isna(obj4)\n", "pd.notna(obj4)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "obj4.isna()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "obj3\n", "obj4\n", "obj3 + obj4" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "obj4.name = \"population\"\n", "obj4.index.name = \"state\"\n", "obj4" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "obj\n", "obj.index = [\"Bob\", \"Steve\", \"Jeff\", \"Ryan\"]\n", "obj" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "data = {\"state\": [\"Ohio\", \"Ohio\", \"Ohio\", \"Nevada\", \"Nevada\", \"Nevada\"],\n", " \"year\": [2000, 2001, 2002, 2001, 2002, 2003],\n", " \"pop\": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}\n", "frame = pd.DataFrame(data)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "frame" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "frame.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "frame.tail()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "pd.DataFrame(data, columns=[\"year\", \"state\", \"pop\"])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "frame2 = pd.DataFrame(data, columns=[\"year\", \"state\", \"pop\", \"debt\"])\n", "frame2\n", "frame2.columns" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "frame2[\"state\"]\n", "frame2.year" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "frame2.loc[1]\n", "frame2.iloc[2]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "frame2[\"debt\"] = 16.5\n", "frame2\n", "frame2[\"debt\"] = np.arange(6.)\n", "frame2" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "val = pd.Series([-1.2, -1.5, -1.7], index=[\"two\", \"four\", \"five\"])\n", "frame2[\"debt\"] = val\n", "frame2" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "frame2[\"eastern\"] = frame2[\"state\"] == \"Ohio\"\n", "frame2" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "del frame2[\"eastern\"]\n", "frame2.columns" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "populations = {\"Ohio\": {2000: 1.5, 2001: 1.7, 2002: 3.6},\n", " \"Nevada\": {2001: 2.4, 2002: 2.9}}" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "frame3 = pd.DataFrame(populations)\n", "frame3" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "frame3.T" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "pd.DataFrame(populations, index=[2001, 2002, 2003])" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "pdata = {\"Ohio\": frame3[\"Ohio\"][:-1],\n", " \"Nevada\": frame3[\"Nevada\"][:2]}\n", "pd.DataFrame(pdata)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "frame3.index.name = \"year\"\n", "frame3.columns.name = \"state\"\n", "frame3" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "frame3.to_numpy()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "frame2.to_numpy()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series(np.arange(3), index=[\"a\", \"b\", \"c\"])\n", "index = obj.index\n", "index\n", "index[1:]" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "labels = pd.Index(np.arange(3))\n", "labels\n", "obj2 = pd.Series([1.5, -2.5, 0], index=labels)\n", "obj2\n", "obj2.index is labels" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "frame3\n", "frame3.columns\n", "\"Ohio\" in frame3.columns\n", "2003 in frame3.index" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "pd.Index([\"foo\", \"foo\", \"bar\", \"bar\"])" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=[\"d\", \"b\", \"a\", \"c\"])\n", "obj" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "obj2 = obj.reindex([\"a\", \"b\", \"c\", \"d\", \"e\"])\n", "obj2" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "obj3 = pd.Series([\"blue\", \"purple\", \"yellow\"], index=[0, 2, 4])\n", "obj3\n", "obj3.reindex(np.arange(6), method=\"ffill\")" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "frame = pd.DataFrame(np.arange(9).reshape((3, 3)),\n", " index=[\"a\", \"c\", \"d\"],\n", " columns=[\"Ohio\", \"Texas\", \"California\"])\n", "frame\n", "frame2 = frame.reindex(index=[\"a\", \"b\", \"c\", \"d\"])\n", "frame2" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "states = [\"Texas\", \"Utah\", \"California\"]\n", "frame.reindex(columns=states)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "frame.reindex(states, axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "frame.loc[[\"a\", \"d\", \"c\"], [\"California\", \"Texas\"]]" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series(np.arange(5.), index=[\"a\", \"b\", \"c\", \"d\", \"e\"])\n", "obj\n", "new_obj = obj.drop(\"c\")\n", "new_obj\n", "obj.drop([\"d\", \"c\"])" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "data = pd.DataFrame(np.arange(16).reshape((4, 4)),\n", " index=[\"Ohio\", \"Colorado\", \"Utah\", \"New York\"],\n", " columns=[\"one\", \"two\", \"three\", \"four\"])\n", "data" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "data.drop(index=[\"Colorado\", \"Ohio\"])" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "data.drop(columns=[\"two\"])" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "data.drop(\"two\", axis=1)\n", "data.drop([\"two\", \"four\"], axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series(np.arange(4.), index=[\"a\", \"b\", \"c\", \"d\"])\n", "obj\n", "obj[\"b\"]\n", "obj[1]\n", "obj[2:4]\n", "obj[[\"b\", \"a\", \"d\"]]\n", "obj[[1, 3]]\n", "obj[obj < 2]" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "obj.loc[[\"b\", \"a\", \"d\"]]" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "obj1 = pd.Series([1, 2, 3], index=[2, 0, 1])\n", "obj2 = pd.Series([1, 2, 3], index=[\"a\", \"b\", \"c\"])\n", "obj1\n", "obj2\n", "obj1[[0, 1, 2]]\n", "obj2[[0, 1, 2]]" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "obj1.iloc[[0, 1, 2]]\n", "obj2.iloc[[0, 1, 2]]" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "obj2.loc[\"b\":\"c\"]" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "obj2.loc[\"b\":\"c\"] = 5\n", "obj2" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "data = pd.DataFrame(np.arange(16).reshape((4, 4)),\n", " index=[\"Ohio\", \"Colorado\", \"Utah\", \"New York\"],\n", " columns=[\"one\", \"two\", \"three\", \"four\"])\n", "data\n", "data[\"two\"]\n", "data[[\"three\", \"one\"]]" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "data[:2]\n", "data[data[\"three\"] > 5]" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "data < 5" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "data[data < 5] = 0\n", "data" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "data\n", "data.loc[\"Colorado\"]" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "data.loc[[\"Colorado\", \"New York\"]]" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "data.loc[\"Colorado\", [\"two\", \"three\"]]" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "data.iloc[2]\n", "data.iloc[[2, 1]]\n", "data.iloc[2, [3, 0, 1]]\n", "data.iloc[[1, 2], [3, 0, 1]]" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "data.loc[:\"Utah\", \"two\"]\n", "data.iloc[:, :3][data.three > 5]" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "data.loc[data.three >= 2]" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "ser = pd.Series(np.arange(3.))\n", "ser\n", "ser[-1]" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "ser" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "ser2 = pd.Series(np.arange(3.), index=[\"a\", \"b\", \"c\"])\n", "ser2[-1]" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "ser.iloc[-1]" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "ser[:2]" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "data.loc[:, \"one\"] = 1\n", "data\n", "data.iloc[2] = 5\n", "data\n", "data.loc[data[\"four\"] > 5] = 3\n", "data" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "data.loc[data.three == 5][\"three\"] = 6" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "data.loc[data.three == 5, \"three\"] = 6\n", "data" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=[\"a\", \"c\", \"d\", \"e\"])\n", "s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],\n", " index=[\"a\", \"c\", \"e\", \"f\", \"g\"])\n", "s1\n", "s2" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [], "source": [ "s1 + s2" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list(\"bcd\"),\n", " index=[\"Ohio\", \"Texas\", \"Colorado\"])\n", "df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list(\"bde\"),\n", " index=[\"Utah\", \"Ohio\", \"Texas\", \"Oregon\"])\n", "df1\n", "df2" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "df1 + df2" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame({\"A\": [1, 2]})\n", "df2 = pd.DataFrame({\"B\": [3, 4]})\n", "df1\n", "df2\n", "df1 + df2" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)),\n", " columns=list(\"abcd\"))\n", "df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)),\n", " columns=list(\"abcde\"))\n", "df2.loc[1, \"b\"] = np.nan\n", "df1\n", "df2" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "df1 + df2" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "df1.add(df2, fill_value=0)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "1 / df1\n", "df1.rdiv(1)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "df1.reindex(columns=df2.columns, fill_value=0)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "arr = np.arange(12.).reshape((3, 4))\n", "arr\n", "arr[0]\n", "arr - arr[0]" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "frame = pd.DataFrame(np.arange(12.).reshape((4, 3)),\n", " columns=list(\"bde\"),\n", " index=[\"Utah\", \"Ohio\", \"Texas\", \"Oregon\"])\n", "series = frame.iloc[0]\n", "frame\n", "series" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "frame - series" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "series2 = pd.Series(np.arange(3), index=[\"b\", \"e\", \"f\"])\n", "series2\n", "frame + series2" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "series3 = frame[\"d\"]\n", "frame\n", "series3\n", "frame.sub(series3, axis=\"index\")" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "frame = pd.DataFrame(np.random.standard_normal((4, 3)),\n", " columns=list(\"bde\"),\n", " index=[\"Utah\", \"Ohio\", \"Texas\", \"Oregon\"])\n", "frame\n", "np.abs(frame)" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "def f1(x):\n", " return x.max() - x.min()\n", "\n", "frame.apply(f1)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "frame.apply(f1, axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "def f2(x):\n", " return pd.Series([x.min(), x.max()], index=[\"min\", \"max\"])\n", "frame.apply(f2)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "def my_format(x):\n", " return f\"{x:.2f}\"\n", "\n", "frame.applymap(my_format)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "frame[\"e\"].map(my_format)" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series(np.arange(4), index=[\"d\", \"a\", \"b\", \"c\"])\n", "obj\n", "obj.sort_index()" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [], "source": [ "frame = pd.DataFrame(np.arange(8).reshape((2, 4)),\n", " index=[\"three\", \"one\"],\n", " columns=[\"d\", \"a\", \"b\", \"c\"])\n", "frame\n", "frame.sort_index()\n", "frame.sort_index(axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "frame.sort_index(axis=\"columns\", ascending=False)" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series([4, 7, -3, 2])\n", "obj.sort_values()" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])\n", "obj.sort_values()" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "obj.sort_values(na_position=\"first\")" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "frame = pd.DataFrame({\"b\": [4, 7, -3, 2], \"a\": [0, 1, 0, 1]})\n", "frame\n", "frame.sort_values(\"b\")" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "frame.sort_values([\"a\", \"b\"])" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series([7, -5, 7, 4, 2, 0, 4])\n", "obj.rank()" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ "obj.rank(method=\"first\")" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [], "source": [ "obj.rank(ascending=False)" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [], "source": [ "frame = pd.DataFrame({\"b\": [4.3, 7, -3, 2], \"a\": [0, 1, 0, 1],\n", " \"c\": [-2, 5, 8, -2.5]})\n", "frame\n", "frame.rank(axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series(np.arange(5), index=[\"a\", \"a\", \"b\", \"b\", \"c\"])\n", "obj" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "obj.index.is_unique" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "obj[\"a\"]\n", "obj[\"c\"]" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(np.random.standard_normal((5, 3)),\n", " index=[\"a\", \"a\", \"b\", \"b\", \"c\"])\n", "df\n", "df.loc[\"b\"]\n", "df.loc[\"c\"]" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],\n", " [np.nan, np.nan], [0.75, -1.3]],\n", " index=[\"a\", \"b\", \"c\", \"d\"],\n", " columns=[\"one\", \"two\"])\n", "df" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [], "source": [ "df.sum()" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [], "source": [ "df.sum(axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "df.sum(axis=\"index\", skipna=False)\n", "df.sum(axis=\"columns\", skipna=False)" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [], "source": [ "df.mean(axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "df.idxmax()" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [], "source": [ "df.cumsum()" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series([\"a\", \"a\", \"b\", \"c\"] * 4)\n", "obj.describe()" ] }, { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [], "source": [ "price = pd.read_pickle(\"examples/yahoo_price.pkl\")\n", "volume = pd.read_pickle(\"examples/yahoo_volume.pkl\")" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [], "source": [ "returns = price.pct_change()\n", "returns.tail()" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [], "source": [ "returns[\"MSFT\"].corr(returns[\"IBM\"])\n", "returns[\"MSFT\"].cov(returns[\"IBM\"])" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "returns.corr()\n", "returns.cov()" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [], "source": [ "returns.corrwith(returns[\"IBM\"])" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "returns.corrwith(volume)" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "obj = pd.Series([\"c\", \"a\", \"d\", \"a\", \"a\", \"b\", \"b\", \"c\", \"c\"])" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "uniques = obj.unique()\n", "uniques" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [], "source": [ "obj.value_counts()" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [], "source": [ "pd.value_counts(obj.to_numpy(), sort=False)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [], "source": [ "obj\n", "mask = obj.isin([\"b\", \"c\"])\n", "mask\n", "obj[mask]" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [], "source": [ "to_match = pd.Series([\"c\", \"a\", \"b\", \"b\", \"c\", \"a\"])\n", "unique_vals = pd.Series([\"c\", \"b\", \"a\"])\n", "indices = pd.Index(unique_vals).get_indexer(to_match)\n", "indices" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [], "source": [ "data = pd.DataFrame({\"Qu1\": [1, 3, 4, 3, 4],\n", " \"Qu2\": [2, 3, 1, 2, 3],\n", " \"Qu3\": [1, 5, 2, 4, 4]})\n", "data" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "data[\"Qu1\"].value_counts().sort_index()" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [], "source": [ "result = data.apply(pd.value_counts).fillna(0)\n", "result" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [], "source": [ "data = pd.DataFrame({\"a\": [1, 1, 1, 2, 2], \"b\": [0, 0, 1, 0, 0]})\n", "data\n", "data.value_counts()" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [], "source": [ "pd.options.display.max_rows = PREVIOUS_MAX_ROWS" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }