{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Iris Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> 📢: **This document was used during early development of siuba. See the [select docs](https://siuba.readthedocs.io/en/latest/api_table_core/03_select.html).**\n",
    "\n",
    "Many different ways of selecting columns from the iris dataset. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from siuba import *\n",
    "import pandas as pd\n",
    "\n",
    "pd.set_option('max_rows', 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Rather than import the iris data from sklearn, I am just including the\n",
    "## first 5 rows.\n",
    "\n",
    "# from sklearn import datasets\n",
    "# iris = datasets.load_iris()\n",
    "# df_iris = pd.DataFrame(iris.data, columns = iris.feature_names)\n",
    "# df_iris['species'] = iris.target_names[iris.target]\n",
    "\n",
    "df_iris = pd.DataFrame({\n",
    "    'sepal length (cm)': [5.1, 4.9, 4.7, 4.6, 5.0],\n",
    "    'sepal width (cm)': [3.5, 3.0, 3.2, 3.1, 3.6],\n",
    "    'petal length (cm)': [1.4, 1.4, 1.3, 1.5, 1.4],\n",
    "    'petal width (cm)': [0.2, 0.2, 0.2, 0.2, 0.2],\n",
    "    'species': ['setosa', 'setosa', 'setosa', 'setosa', 'setosa']\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "      <th>species</th>\n",
       "      <th>sepal length (cm)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "      <td>5.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal width (cm)  petal length (cm)  petal width (cm) species  \\\n",
       "0               3.5                1.4               0.2  setosa   \n",
       "1               3.0                1.4               0.2  setosa   \n",
       "2               3.2                1.3               0.2  setosa   \n",
       "3               3.1                1.5               0.2  setosa   \n",
       "4               3.6                1.4               0.2  setosa   \n",
       "\n",
       "   sepal length (cm)  \n",
       "0                5.1  \n",
       "1                4.9  \n",
       "2                4.7  \n",
       "3                4.6  \n",
       "4                5.0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get sepal columns\n",
    "select(df_iris, _.startswith(\"sepal\"))\n",
    "\n",
    "# get width measure columns\n",
    "# note method calls sent to df_iris.columns.str\n",
    "# so _.endswith(\"...\") is equivalent to df_iris.columns.str.endswith(\"...\")\n",
    "select(df_iris, _.endswith(\"width (cm)\"))\n",
    "\n",
    "# movie species to front\n",
    "# _.endswith(\"\") is a hack to get everything\n",
    "select(df_iris, _.species, _.endswith(\"\"))\n",
    "\n",
    "# move sepal length to the back\n",
    "# first select all variables except Sepal.Length, then re select Sepal.Length\n",
    "select(df_iris, -_[\"sepal length (cm)\"], _[\"sepal length (cm)\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Wide table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>V2</th>\n",
       "      <th>V3</th>\n",
       "      <th>V6</th>\n",
       "      <th>V0</th>\n",
       "      <th>V8</th>\n",
       "      <th>V7</th>\n",
       "      <th>V4</th>\n",
       "      <th>V1</th>\n",
       "      <th>V5</th>\n",
       "      <th>V9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.226851</td>\n",
       "      <td>0.551315</td>\n",
       "      <td>0.980764</td>\n",
       "      <td>0.696469</td>\n",
       "      <td>0.480932</td>\n",
       "      <td>0.684830</td>\n",
       "      <td>0.719469</td>\n",
       "      <td>0.286139</td>\n",
       "      <td>0.423106</td>\n",
       "      <td>0.392118</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.438572</td>\n",
       "      <td>0.059678</td>\n",
       "      <td>0.182492</td>\n",
       "      <td>0.343178</td>\n",
       "      <td>0.531551</td>\n",
       "      <td>0.175452</td>\n",
       "      <td>0.398044</td>\n",
       "      <td>0.729050</td>\n",
       "      <td>0.737995</td>\n",
       "      <td>0.531828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.554383</td>\n",
       "      <td>0.388951</td>\n",
       "      <td>0.357398</td>\n",
       "      <td>0.318766</td>\n",
       "      <td>0.304768</td>\n",
       "      <td>0.043591</td>\n",
       "      <td>0.925132</td>\n",
       "      <td>0.691970</td>\n",
       "      <td>0.841670</td>\n",
       "      <td>0.398186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.355915</td>\n",
       "      <td>0.762548</td>\n",
       "      <td>0.151127</td>\n",
       "      <td>0.704959</td>\n",
       "      <td>0.240856</td>\n",
       "      <td>0.398876</td>\n",
       "      <td>0.593177</td>\n",
       "      <td>0.995358</td>\n",
       "      <td>0.691702</td>\n",
       "      <td>0.343456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          V2        V3        V6        V0        V8        V7        V4  \\\n",
       "0   0.226851  0.551315  0.980764  0.696469  0.480932  0.684830  0.719469   \n",
       "1   0.438572  0.059678  0.182492  0.343178  0.531551  0.175452  0.398044   \n",
       "..       ...       ...       ...       ...       ...       ...       ...   \n",
       "8   0.554383  0.388951  0.357398  0.318766  0.304768  0.043591  0.925132   \n",
       "9   0.355915  0.762548  0.151127  0.704959  0.240856  0.398876  0.593177   \n",
       "\n",
       "          V1        V5        V9  \n",
       "0   0.286139  0.423106  0.392118  \n",
       "1   0.729050  0.737995  0.531828  \n",
       "..       ...       ...       ...  \n",
       "8   0.691970  0.841670  0.398186  \n",
       "9   0.995358  0.691702  0.343456  \n",
       "\n",
       "[10 rows x 10 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from numpy.random import uniform, seed\n",
    "\n",
    "seed(123)\n",
    "\n",
    "df = pd.DataFrame(uniform(size = [10, 10]))\n",
    "df = df[np.array([3, 4, 7, 1, 9, 8, 5, 2, 6, 10]) - 1]\n",
    "df.columns = \"V\" + df.columns.astype(str)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>V3</th>\n",
       "      <th>V6</th>\n",
       "      <th>V0</th>\n",
       "      <th>V8</th>\n",
       "      <th>V7</th>\n",
       "      <th>V4</th>\n",
       "      <th>V1</th>\n",
       "      <th>V5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.551315</td>\n",
       "      <td>0.980764</td>\n",
       "      <td>0.696469</td>\n",
       "      <td>0.480932</td>\n",
       "      <td>0.684830</td>\n",
       "      <td>0.719469</td>\n",
       "      <td>0.286139</td>\n",
       "      <td>0.423106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.059678</td>\n",
       "      <td>0.182492</td>\n",
       "      <td>0.343178</td>\n",
       "      <td>0.531551</td>\n",
       "      <td>0.175452</td>\n",
       "      <td>0.398044</td>\n",
       "      <td>0.729050</td>\n",
       "      <td>0.737995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.388951</td>\n",
       "      <td>0.357398</td>\n",
       "      <td>0.318766</td>\n",
       "      <td>0.304768</td>\n",
       "      <td>0.043591</td>\n",
       "      <td>0.925132</td>\n",
       "      <td>0.691970</td>\n",
       "      <td>0.841670</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.762548</td>\n",
       "      <td>0.151127</td>\n",
       "      <td>0.704959</td>\n",
       "      <td>0.240856</td>\n",
       "      <td>0.398876</td>\n",
       "      <td>0.593177</td>\n",
       "      <td>0.995358</td>\n",
       "      <td>0.691702</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          V3        V6        V0        V8        V7        V4        V1  \\\n",
       "0   0.551315  0.980764  0.696469  0.480932  0.684830  0.719469  0.286139   \n",
       "1   0.059678  0.182492  0.343178  0.531551  0.175452  0.398044  0.729050   \n",
       "..       ...       ...       ...       ...       ...       ...       ...   \n",
       "8   0.388951  0.357398  0.318766  0.304768  0.043591  0.925132  0.691970   \n",
       "9   0.762548  0.151127  0.704959  0.240856  0.398876  0.593177  0.995358   \n",
       "\n",
       "          V5  \n",
       "0   0.423106  \n",
       "1   0.737995  \n",
       "..       ...  \n",
       "8   0.841670  \n",
       "9   0.691702  \n",
       "\n",
       "[10 rows x 8 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "select(df, _[\"V3\":\"V5\"])\n",
    "#select(df, _[\"V5\":\"V3\"])\n",
    "\n",
    "# no num_range capability"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal length (cm)  sepal width (cm) species\n",
       "0                5.1               3.5  setosa\n",
       "1                4.9               3.0  setosa\n",
       "2                4.7               3.2  setosa\n",
       "3                4.6               3.1  setosa\n",
       "4                5.0               3.6  setosa"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# can exclude matches\n",
    "select(df_iris, -_.startswith(\"petal\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal width (cm)</th>\n",
       "      <th>species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal length (cm)  sepal width (cm)  petal_length  petal width (cm) species\n",
       "0                5.1               3.5           1.4               0.2  setosa\n",
       "1                4.9               3.0           1.4               0.2  setosa\n",
       "2                4.7               3.2           1.3               0.2  setosa\n",
       "3                4.6               3.1           1.5               0.2  setosa\n",
       "4                5.0               3.6           1.4               0.2  setosa"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# select can grab specific and rename columns\n",
    "select(df_iris, _.petal_length == _[\"petal length (cm)\"])\n",
    "\n",
    "# rename leaves all columns\n",
    "rename(df_iris, petal_length = \"petal length (cm)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Uh--- TODO? should return error? In tidyverse does group rename\n",
    "# pretty rare to see!\n",
    "#select(df_iris, _.obs == _.startswith('s'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}