{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simple Naive Bayes Example\n",
    "\n",
    "Example based on Lesson 2, from Intro to Machine Learning, Sebastian Thrun and Katie Malone:\n",
    "\n",
    "https://www.udacity.com/course/intro-to-machine-learning--ud120"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import random\n",
    "import numpy as np\n",
    "import pylab as pl\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib notebook"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare our data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def makeTerrainData(n_points=1000):\n",
    "###############################################################################\n",
    "### make the toy dataset\n",
    "    random.seed(42)\n",
    "    grade = [random.random() for ii in range(0,n_points)]\n",
    "    bumpy = [random.random() for ii in range(0,n_points)]\n",
    "    error = [random.random() for ii in range(0,n_points)]\n",
    "    y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)]\n",
    "    for ii in range(0, len(y)):\n",
    "        if grade[ii]>0.8 or bumpy[ii]>0.8:\n",
    "            y[ii] = 1.0\n",
    "\n",
    "### split into train/test sets\n",
    "    X = [[gg, ss] for gg, ss in zip(grade, bumpy)]\n",
    "    split = int(0.75*n_points)\n",
    "    X_train = X[0:split]\n",
    "    X_test  = X[split:]\n",
    "    y_train = y[0:split]\n",
    "    y_test  = y[split:]\n",
    "    return X_train, y_train, X_test, y_test  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have defined a function that generates the data ('makeTerrainData'), lets call it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "features_train, labels_train, features_test, labels_test = makeTerrainData()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define a function for training the classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.naive_bayes import GaussianNB\n",
    "\n",
    "def classify(features_train, labels_train):   \n",
    "    ### import the sklearn module for GaussianNB\n",
    "    ### create classifier\n",
    "    ### fit the classifier on the training features and labels\n",
    "    ### return the fit classifier\n",
    "    classifier = GaussianNB()\n",
    "    classifier.fit(features_train, labels_train)\n",
    "    return classifier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have defined a function that trains the classifier, lets call it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "classifier = classify(features_train, labels_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Show results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def prettyPicture(classifier, X_test, y_test):\n",
    "    x_min = 0.0; x_max = 1.0\n",
    "    y_min = 0.0; y_max = 1.0\n",
    "\n",
    "    # Plot the decision boundary. For that, we will assign a color to each\n",
    "    # point in the mesh [x_min, m_max]x[y_min, y_max].\n",
    "    h = .01  # step size in the mesh\n",
    "    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n",
    "    Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()])\n",
    "\n",
    "    # Put the result into a color plot\n",
    "    Z = Z.reshape(xx.shape)\n",
    "    plt.xlim(xx.min(), xx.max())\n",
    "    plt.ylim(yy.min(), yy.max())\n",
    "\n",
    "    plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)\n",
    "\n",
    "    # Plot also the test points\n",
    "    grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
    "    bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
    "    grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
    "    bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
    "\n",
    "    plt.scatter(grade_sig, bumpy_sig, color = \"b\", label=\"fast\")\n",
    "    plt.scatter(grade_bkg, bumpy_bkg, color = \"r\", label=\"slow\")\n",
    "    plt.legend()\n",
    "    plt.xlabel(\"bumpiness\")\n",
    "    plt.ylabel(\"grade\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prettyPicture(classifier, features_test, labels_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Calculate Accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "labels_predicted = classifier.predict(features_test)\n",
    "accuracy_score(labels_test, labels_predicted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_score(labels_test, labels_predicted, normalize=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(labels_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(221/250)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}