{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 로지스틱 회귀와 그래디언트 하강법" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.datasets import load_breast_cancer\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read in data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#데이터 가져옴\n", "data = load_breast_cancer() \n", "#데이터 키로 설명변수\n", "X = data['data'] \n", "# Relabel such that 0 = 'benign' and 1 = malignant.\n", "Y = 1 - data['target'] #타겟키로 반응변수 0과 1을 역전시킴" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#데이터셋을 train, test로 쪼갬\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=1234)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1) 'sigmoid'및 'gradient'함수를 정의하여 아래 표시된 출력을 생성하라" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#공식 사용\n", "def sigmoid(x):\n", " s = 1.0/(1.0 + np.exp(-x)) \n", " return s\n", "\n", "def gradient(X, Y, beta):\n", " z = np.dot(X,beta.T)*Y\n", " ds = -Y*(1-sigmoid(z))*X\n", " return ds.sum(axis=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2) 'LogisticRegression'클래스를 정의하여 아래 표시된 출력을 생성하라" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "class LogisticRegression:\n", " def __init__(self, learn_rate):\n", " self.rate = learn_rate\n", " self.n_nodes = None\n", " self.beta = None\n", " \n", " def train(self, input_X, input_Y, n_epochs):\n", " self.n_nodes = input_X.shape[1] + 1\n", " self.beta = np.random.normal(0.0,1.0,(1,self.n_nodes))\n", " ones_column = np.ones((input_X.shape[0],1))\n", " X = np.concatenate((ones_column,input_X),axis=1)\n", " Y = (2*input_Y - 1).reshape(-1,1)\n", " for n in range(n_epochs):\n", " self.beta = self.beta - self.rate*gradient(X,Y,self.beta)\n", " return self.beta\n", " \n", " def query(self, input_X, prob=True, cutoff=0.5):\n", " ones_column = np.ones((input_X.shape[0],1))\n", " X = np.concatenate((ones_column,input_X),axis=1)\n", " z = np.dot(X,(self.beta).T)\n", " p = sigmoid(z)\n", " if prob :\n", " return p\n", " else:\n", " return (p > cutoff).astype('int')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3) Sample run" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Hyperparameter for the learner.\n", "learning_rate = 0.001" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Train and predict.\n", "LR = LogisticRegression(learning_rate)\n", "LR.train(X_train, Y_train, 2000)\n", "Y_pred = LR.query(X_test,prob=False,cutoff=0.5)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 0.912\n" ] } ], "source": [ "# Display the accuracy.\n", "acc = (Y_pred == Y_test.reshape(-1,1)).mean() #예측된 y와 실제 y가 같느냐?하면 T/F로 나옴(논리배열 만들어짐)\n", " #논리배열.mean을 하면 T는 1로, F는 0으로 계산\n", "print('Accuracy : {}'.format(np.round(acc,3))) \n", "\n", "#정답의 비율이 바로 정확도 91.2%" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }