{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8537d349-6446-473e-bd9e-fd6ec2042f28",
   "metadata": {},
   "source": [
    "# Lesson 10 demo: Pandas\n",
    "\n",
    "## 1. Set up\n",
    "\n",
    "Install with:\n",
    "\n",
    "```text\n",
    "pip install pandas\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "30dcf970-f9f9-4230-afc5-a31426b16e49",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pandas version 1.5.3\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "print(f'Pandas version {pd.__version__}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d5a78cf-a186-4310-a80d-ed7e3ca14e73",
   "metadata": {
    "toc-hr-collapsed": true
   },
   "source": [
    "## 1. Series: one-dimensional data\n",
    "\n",
    "### 1.1. Create a series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "73015824-e26c-45b0-8659-8a63bb14f637",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    20\n",
      "1    30\n",
      "2    40\n",
      "3    50\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "my_series = pd.Series([20, 30, 40, 50])\n",
    "print(my_series)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e33fe749-feb2-48ff-9930-01b060c6ee76",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(my_series)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "58930871-dea4-4276-908e-8191c2b80906",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Mon', 'Tues', 'Wed', 'Thurs']\n"
     ]
    }
   ],
   "source": [
    "day = ['Mon', 'Tues', 'Wed', 'Thurs']\n",
    "print(day)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8e420bc9-e8a4-423f-a3a1-1a3ba4be0c21",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mon      20\n",
      "Tues     30\n",
      "Wed      40\n",
      "Thurs    50\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "my_indexed_series = pd.Series([20, 30, 40, 50], index=day)\n",
    "print(my_indexed_series)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fa28881d-1e50-4b75-a351-a26b383f56a2",
   "metadata": {},
   "source": [
    "### 1.2. Indexing series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4560f31c-16bf-4e36-8e60-412b4f8bb258",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40\n"
     ]
    }
   ],
   "source": [
    "print(my_indexed_series['Wed'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c2f253e5-a9ca-4200-aa71-d26de5e2efaf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "30\n"
     ]
    }
   ],
   "source": [
    "print(my_series.iloc[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5e7bd797-89bd-4bc0-96d3-56a43a529442",
   "metadata": {},
   "source": [
    "### 2.3. Series operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e3c0ec65-b978-4d7f-9c08-29d77d40150b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "50\n"
     ]
    }
   ],
   "source": [
    "print(my_series.max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f6510c0a-55a2-4a35-8bc6-88af0c94c080",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35.0\n"
     ]
    }
   ],
   "source": [
    "print(my_series.mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c111179b-fb73-46a0-a3e7-cc45280e990d",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 2. DataFrame: two-deimensional data\n",
    "\n",
    "### 2.1. Creating dataframes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ad5862cc-42d6-4b32-8189-ac8d0da02ccc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {\n",
    "    'name': ['Alice', 'Bob', 'Laura', 'George'],\n",
    "    'age': [25, 30, 35, 40],\n",
    "    'city': ['New York', 'Paris', 'London', 'Chicago'],\n",
    "    'salary': [50000, 60000, 70000, 80000]\n",
    "}\n",
    "\n",
    "type(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bf087fe4-1720-4574-90e9-6449cf43809d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'name': ['Alice', 'Bob', 'Laura', 'George'], 'age': [25, 30, 35, 40], 'city': ['New York', 'Paris', 'London', 'Chicago'], 'salary': [50000, 60000, 70000, 80000]}\n"
     ]
    }
   ],
   "source": [
    "print(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6def2b20-846f-4775-80bd-0e1e47ccb2c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age      city  salary\n",
       "0   Alice   25  New York   50000\n",
       "1     Bob   30     Paris   60000\n",
       "2   Laura   35    London   70000\n",
       "3  George   40   Chicago   80000"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(data)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e4a0d969-79c8-4560-9ce4-c69a33e55af6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Day</th>\n",
       "      <th>Temperature</th>\n",
       "      <th>Precipitation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>23.232203</td>\n",
       "      <td>2.645556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>25.727840</td>\n",
       "      <td>7.742337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>24.041451</td>\n",
       "      <td>4.561503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>23.173248</td>\n",
       "      <td>5.684339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>21.354822</td>\n",
       "      <td>0.187898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>24.688412</td>\n",
       "      <td>6.176355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>21.563808</td>\n",
       "      <td>6.120957</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>28.376595</td>\n",
       "      <td>6.169340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>29.454941</td>\n",
       "      <td>9.437481</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>20.751623</td>\n",
       "      <td>6.818203</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Day  Temperature  Precipitation\n",
       "0    1    23.232203       2.645556\n",
       "1    2    25.727840       7.742337\n",
       "2    3    24.041451       4.561503\n",
       "3    4    23.173248       5.684339\n",
       "4    5    21.354822       0.187898\n",
       "5    6    24.688412       6.176355\n",
       "6    7    21.563808       6.120957\n",
       "7    8    28.376595       6.169340\n",
       "8    9    29.454941       9.437481\n",
       "9   10    20.751623       6.818203"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weather_df = pd.read_csv('./Weather_data.csv')\n",
    "weather_df.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "578b6b08-c431-4896-b8cb-ab77b31a102e",
   "metadata": {},
   "source": [
    "### 2.2. Setting the index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "5c491fbf-1f96-40d1-adb8-bf60542f80df",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_index = ['a', 'b', 'c', 'd']\n",
    "df.set_index(pd.Index(new_index), inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "4d9b8158-14ec-4cde-87c9-febda56272ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['index_col'] = new_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "edbb2450-1553-4eda-bbd8-3b74216107a5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "      <th>index_col</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age      city  salary index_col\n",
       "a   Alice   25  New York   50000         a\n",
       "b     Bob   30     Paris   60000         b\n",
       "c   Laura   35    London   70000         c\n",
       "d  George   40   Chicago   80000         d"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "efd96440-2361-4e8b-aa18-36d2ac736044",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age      city  salary\n",
       "a   Alice   25  New York   50000\n",
       "b     Bob   30     Paris   60000\n",
       "c   Laura   35    London   70000\n",
       "d  George   40   Chicago   80000"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop('index_col', axis=1, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "cef670a3-a85a-4da8-b1f0-fc01f27222f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age      city  salary\n",
       "0   Alice   25  New York   50000\n",
       "1     Bob   30     Paris   60000\n",
       "2   Laura   35    London   70000\n",
       "3  George   40   Chicago   80000"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.reset_index(inplace=True, drop=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ac555d48-8979-410c-a68e-f9a62cb120f7",
   "metadata": {},
   "source": [
    "### 2.3. Getting and setting column names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "911eda97-ee4c-4f32-80c5-037838f02a75",
   "metadata": {},
   "outputs": [],
   "source": [
    "col_names = df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "7497e7bc-2e06-4e94-9826-a3b3fc9c33c4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['name', 'age', 'city', 'salary'], dtype='object')\n"
     ]
    }
   ],
   "source": [
    "print(col_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "4b1dc8a4-256a-410b-b68f-1c2f2bdc29de",
   "metadata": {},
   "outputs": [],
   "source": [
    "rename_dict = {'city': 'location'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "3d812b3b-a43d-43e3-a5e5-19ad2e8ab6cf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>location</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age  location  salary\n",
       "0   Alice   25  New York   50000\n",
       "1     Bob   30     Paris   60000\n",
       "2   Laura   35    London   70000\n",
       "3  George   40   Chicago   80000"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.rename(columns=rename_dict, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "371a9b4e-a227-43c3-9d0f-53eb87049c99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>location</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age  location  salary\n",
       "0   Alice   25  New York   50000\n",
       "1     Bob   30     Paris   60000\n",
       "2   Laura   35    London   70000\n",
       "3  George   40   Chicago   80000"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.column = pd.Index(['Name', 'Age', 'City', 'Salary'])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ea6f95c-4875-43fc-b03b-28cbcbd183c9",
   "metadata": {},
   "source": [
    "### 2.4. Getting information about dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "f04579d9-64c2-43ca-aade-dea4b538718a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>location</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    name  age  location  salary\n",
       "0  Alice   25  New York   50000\n",
       "1    Bob   30     Paris   60000"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "e169cfde-a0eb-49e5-a17c-0c9e9cd8bacb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>location</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age location  salary\n",
       "1     Bob   30    Paris   60000\n",
       "2   Laura   35   London   70000\n",
       "3  George   40  Chicago   80000"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "b2828047-da85-4e4d-a631-aa70aaebabbe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4 entries, 0 to 3\n",
      "Data columns (total 4 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   name      4 non-null      object\n",
      " 1   age       4 non-null      int64 \n",
      " 2   location  4 non-null      object\n",
      " 3   salary    4 non-null      int64 \n",
      "dtypes: int64(2), object(2)\n",
      "memory usage: 256.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "1169a714-4f97-4a2f-9eee-9b0236e13909",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>32.500000</td>\n",
       "      <td>65000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>6.454972</td>\n",
       "      <td>12909.944487</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>25.000000</td>\n",
       "      <td>50000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>28.750000</td>\n",
       "      <td>57500.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>32.500000</td>\n",
       "      <td>65000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>36.250000</td>\n",
       "      <td>72500.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>40.000000</td>\n",
       "      <td>80000.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             age        salary\n",
       "count   4.000000      4.000000\n",
       "mean   32.500000  65000.000000\n",
       "std     6.454972  12909.944487\n",
       "min    25.000000  50000.000000\n",
       "25%    28.750000  57500.000000\n",
       "50%    32.500000  65000.000000\n",
       "75%    36.250000  72500.000000\n",
       "max    40.000000  80000.000000"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10de24a4-16d1-4f3e-9889-b5afc0697240",
   "metadata": {},
   "source": [
    "### 2.5. Selecting data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "facf9f2e-b27a-40c2-a887-1ea1bb70b66d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     Alice\n",
       "1       Bob\n",
       "2     Laura\n",
       "3    George\n",
       "Name: name, dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name_col = df['name']\n",
    "name_col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "1d582720-2dfa-491b-aa1e-93ef6780ac14",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  age\n",
       "0   Alice   25\n",
       "1     Bob   30\n",
       "2   Laura   35\n",
       "3  George   40"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cols = ['name', 'age']\n",
    "\n",
    "multiple_col = df[cols]\n",
    "multiple_col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "8e7ce30d-405f-42da-a589-fec8cd968964",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  age      city  salary\n",
       "first    Alice   25  New York   50000\n",
       "second     Bob   30     Paris   60000\n",
       "third    Laura   35    London   70000\n",
       "fourth  George   40   Chicago   80000"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index = pd.Index(['first', 'second', 'third', 'fourth'])\n",
    "\n",
    "df.set_index(index, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "52323183-35fe-4378-afef-e3b57d1329d9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name         Alice\n",
       "age             25\n",
       "city      New York\n",
       "salary       50000\n",
       "Name: first, dtype: object"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['first']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "055d7849-278b-4fad-97de-816efaba36b6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name         Alice\n",
       "age             25\n",
       "city      New York\n",
       "salary       50000\n",
       "Name: first, dtype: object"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "79bd0c20-dea5-4fbc-b162-92cabd52d858",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         name  age      city  salary\n",
       "first   Alice   25  New York   50000\n",
       "second    Bob   30     Paris   60000"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[['first', 'second']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "d0794d2b-11a7-4e1a-a04a-5f938afa72e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>city</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>New York</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>Paris</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         name      city\n",
       "first   Alice  New York\n",
       "second    Bob     Paris"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[['first', 'second'], ['name', 'city']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "ba463bd6-71f9-4ec4-b336-7790828dde42",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        age      city\n",
       "first    25  New York\n",
       "second   30     Paris\n",
       "third    35    London\n",
       "fourth   40   Chicago"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[:, 1:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d4c5c16d-9e07-46d5-99f7-0438e67ac1e5",
   "metadata": {},
   "source": [
    "### 2.6. Filtering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "8d082df3-3bcd-4452-8b52-85fdcc1e61f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "first     False\n",
       "second    False\n",
       "third      True\n",
       "fourth     True\n",
       "Name: salary, dtype: bool"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['salary'] > 60000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "2d5b51d1-9cee-4b6a-919e-95b4a0af95cd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  age     city  salary\n",
       "third    Laura   35   London   70000\n",
       "fourth  George   40  Chicago   80000"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['salary'] > 60000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "13062732-66ad-43fb-b1db-1493a2e771a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        name  age    city  salary\n",
       "third  Laura   35  London   70000"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['city'] == 'London']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "90124921-8a17-44ee-8351-ff8242613651",
   "metadata": {},
   "source": [
    "## 3. Working with dates\n",
    "\n",
    "### 3.1. Creating datetime data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "08162f21-ab61-4596-ad82-bb40e37e3335",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2026-01-01', '2026-02-01', '2026-03-01', '2026-02-01'], dtype='datetime64[ns]', freq=None)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates = pd.to_datetime(['2026-01-01', '2026-02-01', '2026-03-01', '2026-02-01'])\n",
    "dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "c4c3d667-ad54-4cfa-a176-74fad848ec11",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>New Year</td>\n",
       "      <td>2026-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Valentine</td>\n",
       "      <td>2026-02-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spring</td>\n",
       "      <td>2026-03-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>July 4th</td>\n",
       "      <td>2026-07-04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       event       date\n",
       "0   New Year 2026-01-01\n",
       "1  Valentine 2026-02-15\n",
       "2     Spring 2026-03-20\n",
       "3   July 4th 2026-07-04"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dates = pd.DataFrame({\n",
    "    'event': ['New Year', 'Valentine', 'Spring', 'July 4th'],\n",
    "    'date': pd.to_datetime(['2026-01-01', '2026-02-15', '2026-03-20', '2026-07-04'])\n",
    "})\n",
    "df_dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "4ed067c2-04f1-421a-9ae9-9805afb6aaab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4 entries, 0 to 3\n",
      "Data columns (total 2 columns):\n",
      " #   Column  Non-Null Count  Dtype         \n",
      "---  ------  --------------  -----         \n",
      " 0   event   4 non-null      object        \n",
      " 1   date    4 non-null      datetime64[ns]\n",
      "dtypes: datetime64[ns](1), object(1)\n",
      "memory usage: 192.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "df_dates.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "db7bd28c-20c1-4297-af36-b9e20e5eb9c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event</th>\n",
       "      <th>date</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>New Year</td>\n",
       "      <td>2026-01-01</td>\n",
       "      <td>2026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Valentine</td>\n",
       "      <td>2026-02-15</td>\n",
       "      <td>2026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spring</td>\n",
       "      <td>2026-03-20</td>\n",
       "      <td>2026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>July 4th</td>\n",
       "      <td>2026-07-04</td>\n",
       "      <td>2026</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       event       date  year\n",
       "0   New Year 2026-01-01  2026\n",
       "1  Valentine 2026-02-15  2026\n",
       "2     Spring 2026-03-20  2026\n",
       "3   July 4th 2026-07-04  2026"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dates['year'] = df_dates['date'].dt.year\n",
    "df_dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "617899cf-0f4a-47c1-a12e-07ec780a5b2b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event</th>\n",
       "      <th>date</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>New Year</td>\n",
       "      <td>2026-01-01</td>\n",
       "      <td>2026</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Valentine</td>\n",
       "      <td>2026-02-15</td>\n",
       "      <td>2026</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spring</td>\n",
       "      <td>2026-03-20</td>\n",
       "      <td>2026</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>July 4th</td>\n",
       "      <td>2026-07-04</td>\n",
       "      <td>2026</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       event       date  year  month\n",
       "0   New Year 2026-01-01  2026      1\n",
       "1  Valentine 2026-02-15  2026      2\n",
       "2     Spring 2026-03-20  2026      3\n",
       "3   July 4th 2026-07-04  2026      7"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dates['month'] = df_dates['date'].dt.month\n",
    "df_dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "c6e69b73-2c60-4f58-908a-12f78ec791d4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event</th>\n",
       "      <th>date</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>New Year</td>\n",
       "      <td>2026-01-01</td>\n",
       "      <td>2026</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Valentine</td>\n",
       "      <td>2026-02-15</td>\n",
       "      <td>2026</td>\n",
       "      <td>2</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spring</td>\n",
       "      <td>2026-03-20</td>\n",
       "      <td>2026</td>\n",
       "      <td>3</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>July 4th</td>\n",
       "      <td>2026-07-04</td>\n",
       "      <td>2026</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       event       date  year  month  day\n",
       "0   New Year 2026-01-01  2026      1    1\n",
       "1  Valentine 2026-02-15  2026      2   15\n",
       "2     Spring 2026-03-20  2026      3   20\n",
       "3   July 4th 2026-07-04  2026      7    4"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dates['day'] = df_dates['date'].dt.day\n",
    "df_dates"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25d5a116-9f27-4357-9ce9-037800685213",
   "metadata": {},
   "source": [
    "### 3.3. Date arithmetic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "f2521fd5-0043-4e5d-b350-d40736f83313",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event</th>\n",
       "      <th>date</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>days_since_newyear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>New Year</td>\n",
       "      <td>2026-01-01</td>\n",
       "      <td>2026</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Valentine</td>\n",
       "      <td>2026-02-15</td>\n",
       "      <td>2026</td>\n",
       "      <td>2</td>\n",
       "      <td>15</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spring</td>\n",
       "      <td>2026-03-20</td>\n",
       "      <td>2026</td>\n",
       "      <td>3</td>\n",
       "      <td>20</td>\n",
       "      <td>78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>July 4th</td>\n",
       "      <td>2026-07-04</td>\n",
       "      <td>2026</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       event       date  year  month  day  days_since_newyear\n",
       "0   New Year 2026-01-01  2026      1    1                   0\n",
       "1  Valentine 2026-02-15  2026      2   15                  45\n",
       "2     Spring 2026-03-20  2026      3   20                  78\n",
       "3   July 4th 2026-07-04  2026      7    4                 184"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "days_since_newyear = (df_dates['date'] - df_dates['date'].min()).dt.days\n",
    "df_dates['days_since_newyear'] = days_since_newyear\n",
    "df_dates"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7a8ba21-9d74-4a4d-8174-936a520d261e",
   "metadata": {},
   "source": [
    "## 4. Working with data types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "9003c281-e3ae-4808-94a8-c8b586e12409",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>numbers_as_text</th>\n",
       "      <th>prices</th>\n",
       "      <th>categories</th>\n",
       "      <th>is_active</th>\n",
       "      <th>dates_as_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>10.50</td>\n",
       "      <td>A</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>20.75</td>\n",
       "      <td>B</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-01-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>15.25</td>\n",
       "      <td>A</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-01-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>30.00</td>\n",
       "      <td>C</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>25.50</td>\n",
       "      <td>B</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-01-05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  numbers_as_text prices categories is_active dates_as_text\n",
       "0               1  10.50          A      True    2024-01-01\n",
       "1               2  20.75          B     False    2024-01-02\n",
       "2               3  15.25          A      True    2024-01-03\n",
       "3               4  30.00          C      True    2024-01-04\n",
       "4               5  25.50          B     False    2024-01-05"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mixed_data = pd.DataFrame({\n",
    "    'numbers_as_text': ['1', '2', '3', '4', '5'],\n",
    "    'prices': ['10.50', '20.75', '15.25', '30.00', '25.50'],\n",
    "    'categories': ['A', 'B', 'A', 'C', 'B'],\n",
    "    'is_active': ['True', 'False', 'True', 'True', 'False'],\n",
    "    'dates_as_text': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05']\n",
    "})\n",
    "mixed_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "0dc3da58-a059-4500-bb40-6796e4a0fad8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5 entries, 0 to 4\n",
      "Data columns (total 5 columns):\n",
      " #   Column           Non-Null Count  Dtype \n",
      "---  ------           --------------  ----- \n",
      " 0   numbers_as_text  5 non-null      object\n",
      " 1   prices           5 non-null      object\n",
      " 2   categories       5 non-null      object\n",
      " 3   is_active        5 non-null      object\n",
      " 4   dates_as_text    5 non-null      object\n",
      "dtypes: object(5)\n",
      "memory usage: 328.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "mixed_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "1060fb70-a1e5-4dd8-83de-3d3dce550742",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5 entries, 0 to 4\n",
      "Data columns (total 5 columns):\n",
      " #   Column           Non-Null Count  Dtype  \n",
      "---  ------           --------------  -----  \n",
      " 0   numbers_as_text  5 non-null      object \n",
      " 1   prices           5 non-null      float64\n",
      " 2   categories       5 non-null      object \n",
      " 3   is_active        5 non-null      object \n",
      " 4   dates_as_text    5 non-null      object \n",
      "dtypes: float64(1), object(4)\n",
      "memory usage: 328.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "mixed_data['prices'] = mixed_data['prices'].astype(float)\n",
    "mixed_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "34acdb4e-d705-486d-81e2-7b8aa63be884",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>numbers_as_text</th>\n",
       "      <th>prices</th>\n",
       "      <th>categories</th>\n",
       "      <th>is_active</th>\n",
       "      <th>dates_as_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>10.50</td>\n",
       "      <td>A</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>20.75</td>\n",
       "      <td>B</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-01-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>15.25</td>\n",
       "      <td>A</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-01-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>30.00</td>\n",
       "      <td>C</td>\n",
       "      <td>True</td>\n",
       "      <td>2024-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>25.50</td>\n",
       "      <td>B</td>\n",
       "      <td>False</td>\n",
       "      <td>2024-01-05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  numbers_as_text  prices categories is_active dates_as_text\n",
       "0               1   10.50          A      True    2024-01-01\n",
       "1               2   20.75          B     False    2024-01-02\n",
       "2               3   15.25          A      True    2024-01-03\n",
       "3               4   30.00          C      True    2024-01-04\n",
       "4               5   25.50          B     False    2024-01-05"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mixed_data['categories'] = mixed_data['categories'].astype('category')\n",
    "mixed_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "682b76ab-9a8f-42f6-9628-8a8a247ec0cc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5 entries, 0 to 4\n",
      "Data columns (total 5 columns):\n",
      " #   Column           Non-Null Count  Dtype   \n",
      "---  ------           --------------  -----   \n",
      " 0   numbers_as_text  5 non-null      object  \n",
      " 1   prices           5 non-null      float64 \n",
      " 2   categories       5 non-null      category\n",
      " 3   is_active        5 non-null      object  \n",
      " 4   dates_as_text    5 non-null      object  \n",
      "dtypes: category(1), float64(1), object(3)\n",
      "memory usage: 425.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "mixed_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "3e9951f8-93d2-4ece-8d62-75d6aa27ea57",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['A', 'B', 'C'], dtype='object')"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mixed_data['categories'].cat.categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "3a0f8788-72b4-411a-802c-0367789d14de",
   "metadata": {},
   "outputs": [],
   "source": [
    "mixed_data['dates_as_text'] = mixed_data['dates_as_text'].astype('string')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "d85ea388-79f6-4843-93be-534f1de2109a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5 entries, 0 to 4\n",
      "Data columns (total 5 columns):\n",
      " #   Column           Non-Null Count  Dtype   \n",
      "---  ------           --------------  -----   \n",
      " 0   numbers_as_text  5 non-null      object  \n",
      " 1   prices           5 non-null      float64 \n",
      " 2   categories       5 non-null      category\n",
      " 3   is_active        5 non-null      object  \n",
      " 4   dates_as_text    5 non-null      string  \n",
      "dtypes: category(1), float64(1), object(2), string(1)\n",
      "memory usage: 425.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "mixed_data.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d94ef96e-e6d8-4cb2-b879-54275b91c975",
   "metadata": {},
   "source": [
    "## 5. Iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "6c5bce80-48f8-4aba-a49d-202269dfbb32",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "first row, name: Alice\n",
      "second row, name: Bob\n",
      "third row, name: Laura\n",
      "fourth row, name: George\n"
     ]
    }
   ],
   "source": [
    "for index, row in df.iterrows():\n",
    "    print(f'{index} row, name: {row[\"name\"]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "7a3160c6-2948-4df4-bced-cf30a56e5894",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name         Alice\n",
      "age             25\n",
      "city      New York\n",
      "salary       50000\n",
      "Name: first, dtype: object\n",
      "name        Bob\n",
      "age          30\n",
      "city      Paris\n",
      "salary    60000\n",
      "Name: second, dtype: object\n",
      "name       Laura\n",
      "age           35\n",
      "city      London\n",
      "salary     70000\n",
      "Name: third, dtype: object\n",
      "name       George\n",
      "age            40\n",
      "city      Chicago\n",
      "salary      80000\n",
      "Name: fourth, dtype: object\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "\n",
    "while count < len(df):\n",
    "    print(f'{df.iloc[count]}')\n",
    "    count+=1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4b6c5b7a-39d3-4c94-8f75-139cbab32374",
   "metadata": {},
   "source": [
    "## 6. Sorting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "71ad75a5-5e83-482c-b235-ed0c1f950bb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  age      city  salary\n",
       "first    Alice   25  New York   50000\n",
       "second     Bob   30     Paris   60000\n",
       "third    Laura   35    London   70000\n",
       "fourth  George   40   Chicago   80000"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values('age')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "563bac5f-7264-4e8a-9963-b38725036248",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  age      city  salary\n",
       "fourth  George   40   Chicago   80000\n",
       "third    Laura   35    London   70000\n",
       "second     Bob   30     Paris   60000\n",
       "first    Alice   25  New York   50000"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values('age', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "110448ea-df28-4ed5-983f-e2c8b27feb08",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>60000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>70000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>80000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  age      city  salary\n",
       "first    Alice   25  New York   50000\n",
       "second     Bob   30     Paris   60000\n",
       "third    Laura   35    London   70000\n",
       "fourth  George   40   Chicago   80000"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(['age', 'city'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e81e6d72-2b95-4555-81c0-2badc5ce8cdc",
   "metadata": {},
   "source": [
    "## 7. `apply()` and `groupby()`\n",
    "\n",
    "### 7.1 `apply()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "40ec2d2c-ffec-40ff-99c6-114c99f21d65",
   "metadata": {},
   "outputs": [],
   "source": [
    "def double(x):\n",
    "    print(f'Doubling {x}')\n",
    "    return x * 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "a2481c29-20bb-4770-8e5a-d470a5d80ab8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Doubling 5\n",
      "10\n"
     ]
    }
   ],
   "source": [
    "print(double(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "47900c1c-462d-4354-97dc-eddc85c4a082",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    }
   ],
   "source": [
    "double_lambda = lambda x: x * 2\n",
    "print(double_lambda(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "a2a9eb53-3cf4-42d4-9f90-cf0dd50bd74d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>60500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>72600.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>84700.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>96800.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  age      city   salary\n",
       "first    Alice   25  New York  60500.0\n",
       "second     Bob   30     Paris  72600.0\n",
       "third    Laura   35    London  84700.0\n",
       "fourth  George   40   Chicago  96800.0"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['salary'] = df['salary'].apply(lambda x: x * 1.1)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "16af2dce-e9f6-4c7b-a8ed-5614217e2617",
   "metadata": {},
   "outputs": [],
   "source": [
    "def salary_bracket(salary):\n",
    "    \n",
    "    if salary >= 80000:\n",
    "        return 'high'\n",
    "    \n",
    "    elif salary >= 50000:\n",
    "        return 'mid'\n",
    "    \n",
    "    else:\n",
    "        return 'standard'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "c5ec69dc-34d5-419f-87cd-c1d72143b22c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>city</th>\n",
       "      <th>salary</th>\n",
       "      <th>bracket</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>Alice</td>\n",
       "      <td>25</td>\n",
       "      <td>New York</td>\n",
       "      <td>60500.0</td>\n",
       "      <td>mid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>second</th>\n",
       "      <td>Bob</td>\n",
       "      <td>30</td>\n",
       "      <td>Paris</td>\n",
       "      <td>72600.0</td>\n",
       "      <td>mid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>third</th>\n",
       "      <td>Laura</td>\n",
       "      <td>35</td>\n",
       "      <td>London</td>\n",
       "      <td>84700.0</td>\n",
       "      <td>high</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fourth</th>\n",
       "      <td>George</td>\n",
       "      <td>40</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>96800.0</td>\n",
       "      <td>high</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          name  age      city   salary bracket\n",
       "first    Alice   25  New York  60500.0     mid\n",
       "second     Bob   30     Paris  72600.0     mid\n",
       "third    Laura   35    London  84700.0    high\n",
       "fourth  George   40   Chicago  96800.0    high"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['bracket'] = df['salary'].apply(salary_bracket)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c120c229-6e03-4814-b386-aeb71aa87b6a",
   "metadata": {},
   "source": [
    "### 7.2. `groupby()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "e49ddcfc-cb4f-459e-b9f2-32afffe0dbcf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>region</th>\n",
       "      <th>product</th>\n",
       "      <th>revenue</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>North</td>\n",
       "      <td>A</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>South</td>\n",
       "      <td>A</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>North</td>\n",
       "      <td>B</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>South</td>\n",
       "      <td>B</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>East</td>\n",
       "      <td>A</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  region product  revenue\n",
       "0  North       A      100\n",
       "1  South       A      150\n",
       "2  North       B      200\n",
       "3  South       B      130\n",
       "4   East       A      110"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales = pd.DataFrame({\n",
    "    'region': ['North', 'South', 'North', 'South', 'East'],\n",
    "    'product': ['A', 'A', 'B', 'B', 'A'],\n",
    "    'revenue': [100, 150, 200, 130, 110]\n",
    "})\n",
    "\n",
    "sales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "85e7dd27-f25c-4e53-89ad-7ebe93775f48",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "region\n",
       "East     110\n",
       "North    300\n",
       "South    280\n",
       "Name: revenue, dtype: int64"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales.groupby('region')['revenue'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "5135c548-0480-4977-bcb0-8b0b4d50fe29",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "region  product\n",
       "East    A          110\n",
       "North   A          100\n",
       "        B          200\n",
       "South   A          150\n",
       "        B          130\n",
       "Name: revenue, dtype: int64"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales.groupby(['region', 'product'])['revenue'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "95047e7b-93e9-4e7d-bb3e-ec3391c94ad9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "      <th>sum</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>region</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>East</th>\n",
       "      <td>110</td>\n",
       "      <td>110</td>\n",
       "      <td>110</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>North</th>\n",
       "      <td>100</td>\n",
       "      <td>200</td>\n",
       "      <td>300</td>\n",
       "      <td>150.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>South</th>\n",
       "      <td>130</td>\n",
       "      <td>150</td>\n",
       "      <td>280</td>\n",
       "      <td>140.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        min  max  sum   mean\n",
       "region                      \n",
       "East    110  110  110  110.0\n",
       "North   100  200  300  150.0\n",
       "South   130  150  280  140.0"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales.groupby('region')['revenue'].agg(['min', 'max', 'sum', 'mean'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7a75f0cc-8445-4b00-bd05-b527b8ccbe93",
   "metadata": {},
   "source": [
    "## 8. Writing to a file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "ae62b086-8185-42eb-9a86-2a020c7192eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "sales.to_json('./sales_data.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "15e4fdd5-f720-42ed-82ff-bd0b50dd7b4d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>region</th>\n",
       "      <th>product</th>\n",
       "      <th>revenue</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>North</td>\n",
       "      <td>A</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>South</td>\n",
       "      <td>A</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>North</td>\n",
       "      <td>B</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>South</td>\n",
       "      <td>B</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>East</td>\n",
       "      <td>A</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  region product  revenue\n",
       "0  North       A      100\n",
       "1  South       A      150\n",
       "2  North       B      200\n",
       "3  South       B      130\n",
       "4   East       A      110"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sales_json = pd.read_json('./sales_data.json')\n",
    "sales_json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "f3b34e14-5d28-4039-b64f-98b777bcd19a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5 entries, 0 to 4\n",
      "Data columns (total 3 columns):\n",
      " #   Column   Non-Null Count  Dtype \n",
      "---  ------   --------------  ----- \n",
      " 0   region   5 non-null      object\n",
      " 1   product  5 non-null      object\n",
      " 2   revenue  5 non-null      int64 \n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 248.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "sales.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "9f950f05-7c6e-46c1-803a-f34df64eb23d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 5 entries, 0 to 4\n",
      "Data columns (total 3 columns):\n",
      " #   Column   Non-Null Count  Dtype \n",
      "---  ------   --------------  ----- \n",
      " 0   region   5 non-null      object\n",
      " 1   product  5 non-null      object\n",
      " 2   revenue  5 non-null      int64 \n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 160.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "sales_json.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bed11687-7d21-4093-9622-1d17641b88d5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 [3.10]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}