{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Advanced pandas" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Categorical data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 apple\n", "1 orange\n", "2 banana\n", "3 apple\n", "4 orange\n", "5 banana\n", "6 apple\n", "7 orange\n", "8 banana\n", "dtype: object" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values = pd.Series([\"apple\", \"orange\", \"banana\"] * 3)\n", "values" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['apple', 'orange', 'banana'], dtype=object)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values.unique()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "apple 3\n", "orange 3\n", "banana 3\n", "dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values.value_counts()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | fruits | \n", "
---|---|
0 | \n", "apple | \n", "
1 | \n", "orange | \n", "
2 | \n", "banana | \n", "
3 | \n", "apple | \n", "
4 | \n", "orange | \n", "
5 | \n", "banana | \n", "
6 | \n", "apple | \n", "
7 | \n", "orange | \n", "
8 | \n", "banana | \n", "