{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "dc72eb3f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch \n",
    "from torch.utils.data import DataLoader, Dataset\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "800db13a",
   "metadata": {},
   "source": [
    "# Loading structured dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "db81ed75",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal.length</th>\n",
       "      <th>sepal.width</th>\n",
       "      <th>petal.length</th>\n",
       "      <th>petal.width</th>\n",
       "      <th>species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4.8</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>6.2</td>\n",
       "      <td>2.2</td>\n",
       "      <td>4.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>Versicolor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>5.8</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>6.3</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.4</td>\n",
       "      <td>Virginica</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>6.6</td>\n",
       "      <td>2.9</td>\n",
       "      <td>4.6</td>\n",
       "      <td>1.3</td>\n",
       "      <td>Versicolor</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     sepal.length  sepal.width  petal.length  petal.width     species\n",
       "24            4.8          3.4           1.9          0.2      Setosa\n",
       "68            6.2          2.2           4.5          1.5  Versicolor\n",
       "14            5.8          4.0           1.2          0.2      Setosa\n",
       "136           6.3          3.4           5.6          2.4   Virginica\n",
       "58            6.6          2.9           4.6          1.3  Versicolor"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"Data/iris.csv\")\n",
    "df.sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "de3be177",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Setosa', 'Versicolor', 'Virginica'], dtype=object)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"species\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "48d87531",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "{val: ind for ind, val in enumerate(df[\"species\"].unique())}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "59f61df1",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Iris(Dataset):\n",
    "    def __init__(self, target_col_name=\"species\"):\n",
    "        self.df = pd.read_csv(\"Data/iris.csv\")\n",
    "        x = self.df.drop(target_col_name, axis=1).to_numpy()\n",
    "        self.x = torch.from_numpy(x)\n",
    "        \n",
    "        replacement_dict = {'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}\n",
    "        y = self.df[target_col_name].replace(replacement_dict).to_numpy()\n",
    "        self.y = torch.from_numpy(y)\n",
    "\n",
    "    def __getitem__(self, index):\n",
    "        return self.x[index], self.y[index]\n",
    "\n",
    "    def __len__(self):\n",
    "        return self.df.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "7d8f32ee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(150, 5)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c477c964",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "150"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_data = Iris()\n",
    "len(iris_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "1bd4c953",
   "metadata": {},
   "outputs": [],
   "source": [
    "iris_data_loader = DataLoader(iris_data, batch_size=8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "bb4b7e85",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "independent col data: tensor([[5.1000, 3.5000, 1.4000, 0.2000],\n",
      "        [4.9000, 3.0000, 1.4000, 0.2000],\n",
      "        [4.7000, 3.2000, 1.3000, 0.2000],\n",
      "        [4.6000, 3.1000, 1.5000, 0.2000],\n",
      "        [5.0000, 3.6000, 1.4000, 0.2000],\n",
      "        [5.4000, 3.9000, 1.7000, 0.4000],\n",
      "        [4.6000, 3.4000, 1.4000, 0.3000],\n",
      "        [5.0000, 3.4000, 1.5000, 0.2000]], dtype=torch.float64), \n",
      "taget_col: tensor([0, 0, 0, 0, 0, 0, 0, 0])\n"
     ]
    }
   ],
   "source": [
    "for data in iris_data_loader:\n",
    "    x, y = data\n",
    "    print(f\"independent col data: {x}, \\ntaget_col: {y}\")\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ac6e22af",
   "metadata": {},
   "outputs": [],
   "source": [
    "iris_data_loader = DataLoader(iris_data, batch_size=8, shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "37f39384",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "independent col data: tensor([[4.8000, 3.4000, 1.6000, 0.2000],\n",
      "        [5.7000, 2.8000, 4.5000, 1.3000],\n",
      "        [5.8000, 2.7000, 5.1000, 1.9000],\n",
      "        [6.3000, 2.8000, 5.1000, 1.5000],\n",
      "        [7.3000, 2.9000, 6.3000, 1.8000],\n",
      "        [6.9000, 3.1000, 4.9000, 1.5000],\n",
      "        [4.7000, 3.2000, 1.6000, 0.2000],\n",
      "        [6.5000, 2.8000, 4.6000, 1.5000]], dtype=torch.float64), \n",
      "taget_col: tensor([0, 1, 2, 2, 2, 1, 0, 1])\n"
     ]
    }
   ],
   "source": [
    "for data in iris_data_loader:\n",
    "    x, y = data\n",
    "    print(f\"independent col data: {x}, \\ntaget_col: {y}\")\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08f1211a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}