{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "import matplotlib.pyplot as plt\n", "import torch\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from torch.utils.data import Dataset, DataLoader" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class DiabetesDataset(Dataset):\n", " \"\"\" Diabetes dataset.\"\"\"\n", "\n", " # Initialize your data, download, etc.\n", " def __init__(self):\n", " data = pd.read_csv(\"diabetes.csv\", header = None)\n", " self.len = 500\n", " self.x_data = torch.from_numpy(data.iloc[:500, 0:-1].values)\n", " self.y_data = torch.from_numpy(data.iloc[:500, -1].values)\n", "\n", " def __getitem__(self, index):\n", " return self.x_data[index], self.y_data[index]\n", "\n", " def __len__(self):\n", " return self.len" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dataset = DiabetesDataset()\n", "train_loader = DataLoader(dataset=dataset,\n", " batch_size=128,\n", " shuffle=True,\n", " num_workers=2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012345678
0-0.2941180.4874370.180328-0.2929290.0000000.001490-0.531170-0.0333330
1-0.882353-0.1457290.081967-0.4141410.000000-0.207153-0.766866-0.6666671
2-0.0588240.8391960.0491800.0000000.000000-0.305514-0.492741-0.6333330
3-0.882353-0.1055280.081967-0.535354-0.777778-0.162444-0.9239970.0000001
40.0000000.376884-0.344262-0.292929-0.6028370.2846500.887276-0.6000000
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 -0.294118 0.487437 0.180328 -0.292929 0.000000 0.001490 -0.531170 \n", "1 -0.882353 -0.145729 0.081967 -0.414141 0.000000 -0.207153 -0.766866 \n", "2 -0.058824 0.839196 0.049180 0.000000 0.000000 -0.305514 -0.492741 \n", "3 -0.882353 -0.105528 0.081967 -0.535354 -0.777778 -0.162444 -0.923997 \n", "4 0.000000 0.376884 -0.344262 -0.292929 -0.602837 0.284650 0.887276 \n", "\n", " 7 8 \n", "0 -0.033333 0 \n", "1 -0.666667 1 \n", "2 -0.633333 0 \n", "3 0.000000 1 \n", "4 -0.600000 0 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test = pd.read_csv(\"diabetes.csv\", header = None)\n", "test.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "test_x = torch.from_numpy(test.iloc[500:, 0:-1].values)\n", "test_y = torch.from_numpy(test.iloc[500:, -1].values)\n", "test_x = torch.tensor(test_x, dtype = torch.float)\n", "test_y = torch.tensor(test_y, dtype = torch.float).reshape(-1,1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class LogisticRegression(torch.nn.Module):\n", " def __init__(self):\n", " super(LogisticRegression,self).__init__()\n", " self.l1 = torch.nn.Linear(8,6)\n", " self.l2 = torch.nn.Linear(6,4)\n", " self.l3 = torch.nn.Linear(4,1)\n", " self.sigmoid = torch.nn.Sigmoid()\n", " self.relu = torch.nn.ReLU()\n", " self.tanh = torch.nn.Tanh()\n", " def forward(self,x):\n", " o1 = self.tanh(self.l1(x))\n", " o2 = self.tanh(self.l2(o1))\n", " o3 = self.tanh(self.l3(o2))\n", " return self.sigmoid(o3)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "model = LogisticRegression()\n", "criterion = torch.nn.BCELoss()\n", "optimizer = torch.optim.RMSprop(model.parameters(), lr = 0.01)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "epoch_list = []\n", "train_loss = []\n", "test_loss = []" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for epoch in range(200):\n", " lo = []\n", " y_pred = model(test_x)\n", " test_l = criterion(y_pred, test_y)\n", " test_loss.append(test_l.item())\n", " \n", " for i, data in enumerate(train_loader, 0):\n", " # get the inputs\n", " inputs, labels = data\n", "\n", " # wrap them in Variable\n", " x, y = torch.tensor(inputs, dtype = torch.float), torch.tensor(labels.reshape(-1,1), dtype = torch.float)\n", " y_pred = model(x)\n", " loss = criterion(y_pred, y)\n", " loss.backward()\n", " optimizer.step()\n", " optimizer.zero_grad()\n", " lo.append(loss.item())\n", " train_loss.append(np.mean(lo))\n", " epoch_list.append(epoch)\n", " " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot(epoch_list, train_loss, label = \"Train loss\") \n", "plt.plot(epoch_list, test_loss, label = \"Test loss\")\n", "plt.legend()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test Accuracy" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "y_pred = model(test_x)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "y_pred = y_pred >0.5" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7953667953667953" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accuracy_score(y_pred, test_y)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }