{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#import correct libraries \n",
      "import cv2\n",
      "import numpy as np\n",
      "import matplotlib.pyplot as plt\n",
      "import os"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 48
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#Resize all images to 500x500 and convert from BGR to RGB color model\n",
      "size = (500,500)\n",
      "def img_to_numpy(filename):\n",
      "    img = cv2.imread(filename)\n",
      "    #convert to RGB\n",
      "    img = img[:,:,[2,1,0]]\n",
      "    #change size\n",
      "    img = cv2.resize(img,size)\n",
      "    return img"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 49
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#Create list of all images in directory\n",
      "img_dir = \"/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/\"\n",
      "images = [img_dir+ f for f in os.listdir(img_dir)]\n",
      "images.remove('/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/.DS_Store')\n",
      "labels = [1 if \"magritte\" in f.split('/')[-1] else 0 for f in images]\n",
      "\n",
      "data = []\n",
      "for image in images:\n",
      "    img = img_to_numpy(image)\n",
      "    data.append(img)\n",
      "    \n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 195
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#flatten array\n",
      "def flatten(image):\n",
      "    img = image.flatten()\n",
      "    return img"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 169
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#create list of flattened array of all images \n",
      "flat_list = []\n",
      "for i in data:\n",
      "    img = flatten(i)\n",
      "    flat_list.append(img)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 171
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n",
      "from sklearn.cross_validation import train_test_split\n",
      "\n",
      "X_train, X_test, y_train, y_test = train_test_split(flat_list, labels, test_size=0.3)\n",
      "print X_train.shape, X_test.shape"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "(84, 750000) (36, 750000)\n"
       ]
      }
     ],
     "prompt_number": 175
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn import linear_model\n",
      "from sklearn import metrics\n",
      "\n",
      "clf = linear_model.SGDClassifier()\n",
      "clf.fit(X_train, y_train)\n",
      "\n",
      "y_pred = clf.predict(X_test)\n",
      "\n",
      "y_pred\n",
      "\n",
      "y_test\n",
      "\n",
      "# print metrics.accuracy_score(y_test,y_pred)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 205,
       "text": [
        "array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,\n",
        "       0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1])"
       ]
      }
     ],
     "prompt_number": 205
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn import svm\n",
      "\n",
      "svc = svm.SVC(kernel='linear')\n",
      "svc.fit(X_train, y_train)\n",
      "y_pred = svc.predict(X_test)\n",
      "\n",
      "print metrics.accuracy_score(y_test,y_pred)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "0.75\n"
       ]
      }
     ],
     "prompt_number": 179
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn.svm import LinearSVC\n",
      "\n",
      "\n",
      "clf = LinearSVC()\n",
      "clf.fit(X_train, y_train)\n",
      "y_pred = clf.predict(X_test)\n",
      "\n",
      "\n",
      "print \"classification accuracy:\", metrics.accuracy_score(y_test, y_pred)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "classification accuracy: 0.75\n"
       ]
      }
     ],
     "prompt_number": 180
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn.naive_bayes import MultinomialNB\n",
      "clf = MultinomialNB()\n",
      "clf.fit(X_train, y_train)\n",
      "y_pred = clf.predict(X_test)\n",
      "\n",
      "print \"classification accuracy:\", metrics.accuracy_score(y_test, y_pred)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "classification accuracy: 0.805555555556\n"
       ]
      }
     ],
     "prompt_number": 181
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn.naive_bayes import GaussianNB\n",
      "clf = GaussianNB() \n",
      "clf.fit(X_train, y_train)\n",
      "y_pred = clf.predict(X_test)\n",
      "\n",
      "print \"classification accuracy:\", metrics.accuracy_score(y_test, y_pred)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "classification accuracy: 0.722222222222\n"
       ]
      }
     ],
     "prompt_number": 182
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn.ensemble import RandomForestClassifier\n",
      "rf = RandomForestClassifier(n_estimators=100, min_samples_split=2)\n",
      "rf.fit(X_train, y_train)\n",
      "y_pred =  rf.predict_proba(X_test)\n",
      "\n",
      "\n",
      "\n",
      "predicted_probs = rf.predict_proba(flat_list)\n",
      "predicted_probs = [\"%f\" % x[1] for x in predicted_probs]\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 189
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}