{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "#import correct libraries \n", "import cv2\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import os" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 48 }, { "cell_type": "code", "collapsed": false, "input": [ "#Resize all images to 500x500 and convert from BGR to RGB color model\n", "size = (500,500)\n", "def img_to_numpy(filename):\n", " img = cv2.imread(filename)\n", " #convert to RGB\n", " img = img[:,:,[2,1,0]]\n", " #change size\n", " img = cv2.resize(img,size)\n", " return img" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 49 }, { "cell_type": "code", "collapsed": false, "input": [ "#Create list of all images in directory\n", "img_dir = \"/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/\"\n", "images = [img_dir+ f for f in os.listdir(img_dir)]\n", "images.remove('/Users/danielforsyth/Desktop/Dev/Python/Image Classification/paintings/.DS_Store')\n", "labels = [1 if \"magritte\" in f.split('/')[-1] else 0 for f in images]\n", "\n", "data = []\n", "for image in images:\n", " img = img_to_numpy(image)\n", " data.append(img)\n", " \n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 195 }, { "cell_type": "code", "collapsed": false, "input": [ "#flatten array\n", "def flatten(image):\n", " img = image.flatten()\n", " return img" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 169 }, { "cell_type": "code", "collapsed": false, "input": [ "#create list of flattened array of all images \n", "flat_list = []\n", "for i in data:\n", " img = flatten(i)\n", " flat_list.append(img)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 171 }, { "cell_type": "code", "collapsed": false, "input": [ "\n", "from sklearn.cross_validation import train_test_split\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(flat_list, labels, test_size=0.3)\n", "print X_train.shape, X_test.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(84, 750000) (36, 750000)\n" ] } ], "prompt_number": 175 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn import linear_model\n", "from sklearn import metrics\n", "\n", "clf = linear_model.SGDClassifier()\n", "clf.fit(X_train, y_train)\n", "\n", "y_pred = clf.predict(X_test)\n", "\n", "y_pred\n", "\n", "y_test\n", "\n", "# print metrics.accuracy_score(y_test,y_pred)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 205, "text": [ "array([1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,\n", " 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1])" ] } ], "prompt_number": 205 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn import svm\n", "\n", "svc = svm.SVC(kernel='linear')\n", "svc.fit(X_train, y_train)\n", "y_pred = svc.predict(X_test)\n", "\n", "print metrics.accuracy_score(y_test,y_pred)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0.75\n" ] } ], "prompt_number": 179 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn.svm import LinearSVC\n", "\n", "\n", "clf = LinearSVC()\n", "clf.fit(X_train, y_train)\n", "y_pred = clf.predict(X_test)\n", "\n", "\n", "print \"classification accuracy:\", metrics.accuracy_score(y_test, y_pred)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "classification accuracy: 0.75\n" ] } ], "prompt_number": 180 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn.naive_bayes import MultinomialNB\n", "clf = MultinomialNB()\n", "clf.fit(X_train, y_train)\n", "y_pred = clf.predict(X_test)\n", "\n", "print \"classification accuracy:\", metrics.accuracy_score(y_test, y_pred)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "classification accuracy: 0.805555555556\n" ] } ], "prompt_number": 181 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn.naive_bayes import GaussianNB\n", "clf = GaussianNB() \n", "clf.fit(X_train, y_train)\n", "y_pred = clf.predict(X_test)\n", "\n", "print \"classification accuracy:\", metrics.accuracy_score(y_test, y_pred)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "classification accuracy: 0.722222222222\n" ] } ], "prompt_number": 182 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf = RandomForestClassifier(n_estimators=100, min_samples_split=2)\n", "rf.fit(X_train, y_train)\n", "y_pred = rf.predict_proba(X_test)\n", "\n", "\n", "\n", "predicted_probs = rf.predict_proba(flat_list)\n", "predicted_probs = [\"%f\" % x[1] for x in predicted_probs]\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 189 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }