{ "metadata": { "name": "script3" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "import scipy as sp\n", "import pylab as pl\n", "from sklearn.feature_extraction import text\n", "from sklearn import feature_selection\n", "from sklearn import svm\n", "from sklearn import linear_model\n", "from sklearn import neighbors\n", "from sklearn import naive_bayes\n", "from sklearn import metrics\n", "from sklearn import cross_validation\n", "from sklearn import grid_search\n", "from sklearn import ensemble\n", "from sklearn import decomposition\n", "import csv, codecs, re, unicodedata\n", "import simplenlp\n", "from nltk import corpus\n", "from nltk import tokenize\n", "import nltk\n", "from nltk import collocations\n", "from nltk import stem\n", "from time import time" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "CHAT_WORDS = set([w.lower() for w in corpus.nps_chat.words()])\n", "POS_KEY_WORDS = set(['maggot', 'pussy','dumb','bitch', 'momscreen','suffer','nazi',\n", " 'shit','troll','rapist','bastard','hoe','buddy','idiot','dick','cuz','tard','breath','stupidity',\n", " 'black','moron','racist', 'asshole','ass','shut','shitshut','complete','screen',\n", " 'motherfucker','mom','crawl','fag','cunt','sound','knock','retard','plain',\n", " 'coward','loser','lil','stupid','dirty','turd','cock','suck', 'fuck'])\n", "STOP_WORDS = set([w.lower() for w in corpus.stopwords.words('english')])\n", "CHAT_WO_STOP_WORDS = CHAT_WORDS - STOP_WORDS | set(['u','you','ur', 'are', 're'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "## CONSTANTS and CONTROLS\n", "TEST_RATIO = 0\n", "SELECT_CHI2 = 1 # 0 or negative values to turn it off\n", "FEATURE_EXTRACTION = 'raw' # possbile values 'raw' (non), 'pca', 'ica', 'l1-svc' (linear svc with l1 penalty)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "## function to load unicode CSV\n", "def read_csv_columns(csv_file, columns, header = True):\n", " reader = csv.reader(codecs.open(csv_file, 'r', 'latin-1'), delimiter=',')\n", " if header:\n", " next(reader)\n", " data = []\n", " for row in reader:\n", " fields = []\n", " for c in columns:\n", " txt = re.sub(r'^\"|\"$','',row[c]).decode('unicode-escape')\n", " try:\n", " txt = txt.decode('unicode-escape')\n", " except: pass\n", " unicodedata.normalize('NFKD', txt).encode('ascii', 'ignore')\n", " fields.append(simplenlp.preprocess_text(txt))\n", " data.append(fields)\n", " return np.array(data)\n", "\n", "def write_output(classifier, X, output_path = '../data/output.csv'):\n", " probas = classifier.predict_proba(X)\n", " with open(output_path, 'w') as f:\n", " f.write('Insult,\\n')\n", " for i in xrange(probas.shape[0]):\n", " f.write(str(probas[i,1]))\n", " f.write(',\\n')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "## load train and test data\n", "train_data = read_csv_columns('../data/train.csv', columns = (0, 2))\n", "test_data = read_csv_columns('../data/test_with_solutions.csv', columns= (0, 2))\n", "verification_data = read_csv_columns('../data/verification.csv', columns=(2,))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "X = train_data[:, 1]\n", "y = np.array(train_data[:, 0], dtype='uint8')\n", "## 1. split the data into training and test\n", "## train_X, test_X, train_y, test_y = cross_validation.train_test_split(X, y, test_size = TEST_RATIO, random_state = 0)\n", "## 2. directly use the test.csv as test data\n", "train_X, train_y = X, y\n", "test_X, test_y = test_data[:, 1], np.array(test_data[:, 0], dtype='uint8')\n", "## overfitting\n", "train_X, train_y = np.hstack((train_X, test_X)), np.hstack((train_y, test_y))\n", "verification_X = verification_data[:, 0]\n", "print verification_X.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(2235,)\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "######### nltk analysis ###########\n", "train_pos_tweets, train_neg_tweets = train_X[train_y == 1], train_X[train_y == 0]\n", "test_pos_tweets, test_neg_tweets = test_X[test_y == 1], test_X[test_y == 0]\n", "## Bigram for positive tweets\n", "train_pos_words = [w.lower() for w in tokenize.wordpunct_tokenize(' '.join(train_pos_tweets))]\n", "word_filter = lambda w: len(w) < 3 or w in STOP_WORDS\n", "pos_bicollocator = collocations.BigramCollocationFinder.from_words(train_pos_words)\n", "pos_bicollocator.apply_word_filter(word_filter)\n", "POS_BI_COLLACATIONS = pos_bicollocator.nbest(nltk.metrics.BigramAssocMeasures.likelihood_ratio, 100)\n", "stemmer = stem.PorterStemmer()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "## find synonyms of a word\n", "def synonyms(word):\n", " return [lemma.name for syn in corpus.wordnet.synsets(word) for lemma in syn.lemmas]\n", "\n", "def my_tokenizer(txt):\n", " ## assert s.islower()\n", " ########### preprocessing ############\n", " ## remove @name tag name - dont remove it, use it!\n", " #txt = re.compile(r'@\\w+').sub('', txt)\n", " ## remove topics susch as #1\n", " txt = re.compile(r'(\\A|\\s)#[\\w\\d]+').sub(\"\",txt)\n", " ## condense 3 or more than 3 letters into 1, e.g. hhhheeeello to hello\n", " txt = re.compile(r'(\\w)\\1{2,}').sub(r'\\1', txt)\n", " feats = []\n", " ############ features based on raw text #############\n", " \n", " ############ features based on lower text ###########\n", " txt = txt.lower()\n", " nametags = re.findall(r'@\\w+', txt)\n", " if nametags:\n", " feats += ['@NameTags']\n", " htmltags = re.findall(r'html', txt)\n", " if htmltags:\n", " feats += ['@HtmlTags']\n", " if any([w in txt for w in ['u', 'you', 'you are', 'ur']]):\n", " feats += ['@YouAre']\n", " ## has positive words\n", " if any([w in txt for w in POS_KEY_WORDS]):\n", " feats += ['@HasPositive']\n", " ## postive bi_collocations\n", " #for pair in POS_BI_COLLACATIONS:\n", " # if all([w in txt for w in pair]):\n", " # feats.append(pair)\n", " ############ features based on bag of words #########\n", " words = tokenize.wordpunct_tokenize(txt)\n", " ## stem words\n", " words = map(lambda w: stemmer.stem(w), words)\n", " ## bag of words\n", " #feats += filter(lambda t: t.isalpha() and t in CHAT_WORDS, words)\n", " feats += words\n", " ## has positive words\n", " ##if any([w in POS_KEY_WORDS for w in words]):\n", " ## feats += ['@HasPositive']\n", " feats += [tuple(words[i:i+2]) for i in xrange(len(words)-1)]\n", " ## words after you, u, ur you're\n", " strict_words = filter(lambda w: w.isalpha() and w in CHAT_WO_STOP_WORDS, words)\n", " feats += [tuple(strict_words[i:i+2])\n", " for (i,w) in enumerate(strict_words) if w in ('you', 'u', 'ur')]\n", " feats += [tuple(strict_words[i:i+3])\n", " for (i,w) in enumerate(strict_words) if w in ('you', 'u', 'ur')]\n", " ## biwords of strict words\n", " feats += [(strict_words[i], strict_words[i+1]) for i in xrange(len(strict_words)-1)]\n", " ## synonyms of strict words\n", " #for sw in strict_words:\n", " # if sw in POS_KEY_WORDS:\n", " # feats += synonyms(w)\n", " for w in words:\n", " try:\n", " pos = corpus.wordnet.synsets(w)[0].pos\n", " if pos in ('n', 'a', 'v'):\n", " feats += [(w, pos)]\n", " except: pass\n", " return feats\n", "\n", "## building features on training data\n", "tfidf_vectorizer = text.TfidfVectorizer(charset = 'latin-1', lowercase=False, \n", " sublinear_tf=True, tokenizer = my_tokenizer,#vocabulary = CHAT_WORDS,\n", " max_df=1.0)#, norm = 'l1')\n", "print 'extracting tfidf from training set...'\n", "t0 = time()\n", "train_X = tfidf_vectorizer.fit_transform(train_X)\n", "print 'done in %0.2fs' % (time() - t0)\n", "print 'shape of training data', train_X.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "extracting tfidf from training set...\n", "done in 32.35s" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "shape of training data (6594, 43355)\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "## add extra features to tfidf\n", "'@HasPositive' in tfidf_vectorizer.get_feature_names()\n", "#print filter(lambda f: f[0]=='you', tfidf_vectorizer.get_feature_names())" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 9, "text": [ "True" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "## extract features from test data using same feature set\n", "print 'extracting tfidf from testing set...'\n", "t0 = time()\n", "test_X = tfidf_vectorizer.transform(test_X)\n", "verification_X = tfidf_vectorizer.transform(verification_X)\n", "print 'done in %0.2fs' % (time() - t0)\n", "print 'shape of testing data', test_X.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "extracting tfidf from testing set...\n", "done in 10.88s" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "shape of testing data (2647, 43355)\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "## optionally do feature selection based on chi2\n", "if SELECT_CHI2 > 0:\n", " n_selected_features = int(train_X.shape[1] * SELECT_CHI2)\n", " print 'Extracting %d best features by a chi-squred test' % n_selected_features\n", " t0 = time()\n", " ch2 = feature_selection.SelectKBest(feature_selection.chi2, k = n_selected_features)\n", " train_X = ch2.fit_transform(train_X, train_y)\n", " test_X = ch2.transform(test_X)\n", " print 'done in %0.2fs' % (time() - t0)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Extracting 43355 best features by a chi-squred test\n", "done in 0.11s" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "## optionally do feature extraction based on PCA or ICA\n", "if FEATURE_EXTRACTION == 'raw':\n", " pass\n", "elif FEATURE_EXTRACTION == 'pca':\n", " t0 = time()\n", " pca = decomposition.PCA(n_components=100)\n", " train_X = sp.sparse.coo_matrix(pca.fit_transform(train_X.todense()))\n", " test_X = sp.sparse.coo_matrix(pca.transform(test_X.todense()))\n", " print 'pca done in %0.3f' % (time() - t0)\n", "elif FEATURE_EXTRACTION == 'ica':\n", " t0 = time()\n", " ica = decomposition.FastICA(n_components = 100)\n", " train_X = sp.sparse.coo_matrix(ica.fit_transform(train_X.todense()))\n", " test_X = sp.sparse.coo_matrix(ica.transform(test_X.todense()))\n", " print 'ica done in %0.3f' % (time() - t0)\n", "elif FEATURE_EXTRACTION == 'l1-svc':\n", " t0 = time()\n", " l1svc = svm.LinearSVC(C = 1, penalty = 'l1', dual = False)\n", " l1svc.fit(train_X, train_y)\n", " train_X = l1svc.transform(train_X)\n", " test_X = l1svc.transform(test_X)\n", " print 'l1-svc feature selection done in %0.3f' % (time() - t0)\n", "else:\n", " raise RuntimeError('unknown feature extraction method')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "## define feature names from tfidf vectorizer\n", "tfidf_feature_names = tfidf_vectorizer.get_feature_names()\n", "print train_X.shape, test_X.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(6594, 43355) (2647, 43355)\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "## benchmark classifiers\n", "def benchmark(clf):\n", " print 76 * '_'\n", " print 'training: '\n", " print clf\n", " t0 = time()\n", " clf.fit(train_X, train_y)\n", " print 'training time: %0.2f' % (time() - t0)\n", " \n", " t0 = time()\n", " if hasattr(clf, 'predict_proba'):\n", " try:\n", " pred_probas = clf.predict_proba(test_X)[:,1]\n", " except:\n", " pred_probas = None\n", " else:\n", " pred_probas = None\n", " pred = clf.predict(test_X)\n", " print 'test time: %0.2f' % (time() - t0)\n", " \n", " print 'confusion matrix:'\n", " print metrics.confusion_matrix(test_y, pred)\n", " \n", " print 'classification rate:'\n", " print np.mean(test_y == pred) * 100., '%'\n", " \n", " if pred_probas is not None:\n", " fpr, tpr, thresholds = metrics.roc_curve(test_y, pred_probas)\n", " auc_area = metrics.auc(fpr, tpr)\n", " print 'AUC for test: ', auc_area\n", " print 'ROC plot'\n", " pl.figure()\n", " pl.plot(fpr, tpr, 'b-')\n", " \n", " misclassified_indices = (test_y != pred)\n", " misclassified = zip(test_data[misclassified_indices], test_y[misclassified_indices])\n", " return misclassified" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "## test SVC\n", "#svc_classifier = svm.SVC(kernel = 'linear', C = 10, gamma = 0.01, tol=0.001, probability = True) # for tfidf l1 norm\n", "svc_classifier = svm.SVC(kernel = 'linear', C = 1, gamma = 0.01, tol=0.001, probability = True) # for tfidf l2 norm\n", "#svc_classifier = svm.SVC(kernel = 'rbf', C = 10, gamma = 0.01, tol=0.0001, probability = True)\n", "misclassified = benchmark(svc_classifier)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "SVC(C=1, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.01,\n", " kernel=linear, probability=True, shrinking=True, tol=0.001,\n", " verbose=False)\n", "training time: 68.33" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "test time: 8.85" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "confusion matrix:\n", "[[1941 13]\n", " [ 80 613]]\n", "classification rate:\n", "96.4865885909 %\n", "AUC for test: 0.996905744091\n", "ROC plot\n" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD9CAYAAABHnDf0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEylJREFUeJzt3W9M1PcBx/HP2bu21jnqvxm9u4zKXQSrHN1oLDN2Z7sG\na7dbtrYJfbBUxhgxYV0XH2xzDwZ9YKTJHizlCV1al9qVsG5LaFa4bjqvzaSA8w9sysxVZTtYasqq\n1eky8PzuASmUAseBd7+TL+9XQvSX+3rfr9+Yd3/98eN3LmOMEQDAKotyvQAAQOYRdwCwEHEHAAsR\ndwCwEHEHAAsRdwCwUMq4f/vb39bq1au1adOmacc888wzCgaDCoVCOnHiRMYXCACYvZRxr6ysVDQa\nnfb1trY2vffee4rH43rxxRe1a9eujC8QADB7KeO+detWLVu2bNrX33jjDT399NOSpM2bN+vSpUu6\ncOFCZlcIAJg198384cHBQfn9/rFjn8+ngYEBrV69esI4l8t1M9MAwII114cI3FTcp5p4upCnu8CP\nPpKOHJGMkfbulTo6pHB4/PWrV6UtW6SCgtHjGzekRx+VliyZy+qd97Of1Wn37rpcL+OWwF6MYy/G\nsRfjvN65nxjfVNy9Xq8SicTY8cDAgLxe76zfJ5mU2tqkv/1N2rNnNNRbt44G/tAh6aGHbmaVt5al\nS6W1a3O9ilsDezGOvRjHXmTGTd0KGYlE9Morr0iSOjs7dffdd0+6JJOOY8ekSETq7JR275auXJHa\n20fP2m0KOwA4JeWZ+1NPPaW3335bQ0ND8vv9qq+v18jIiCSppqZGO3bsUFtbmwKBgJYsWaL9+/fP\negFnz45eZvn856XW1rn9JeaT8CevMS1w7MU49mIce5EZLice+etyuaa95r5tm3TixOjZ+8fX0QEA\nqds545/NZdw//FBasUL64x+lr3wl26sAgPnlZuKe08cP/P73o78SdgDIrJzG/cwZ6RvfyOUKAMBO\nOT9zX7MmlysAADvlLO4dHVJvr/Tkk7laAQDYKyffUB0elu64Qyoulnp6sj07AMxP8+5umfffH70c\nc+OGxGNnAGBq8+5umX/9a/RXwg4A2ZGTuL/1ljSHR9AAANKUk7j/4Q+jz5IBAGRHTuL+wQdSSUku\nZgaAhcHxuPf2SqdOSV/8otMzA8DC4XjcEwkpFJK+8AWnZwaAhcPxuH/0kbRqFXfKAEA2OR73X/xC\nWpTThx4AgP1u+jNUZysWk15/3elZAWBhcfwnVF2u0Q+5vuuubM8KAPPbvPoJVbdb8nicnhUAFhZH\n4z4yIl2/7uSMALAwORr39vbRX92OX+kHgIXF0bh3dEhlZdwGCQDZ5mjc77qLz0sFACc4GvejR0fv\nlAEAZJejcfd4eOwAADjB0bhzGyQAOIMHAQCAhRyN+1//KiWTTs4IAAuTo3H/z3+kQMDJGQFgYXI0\n7kND0pIlTs4IAAuTYw8OSyaNbrtt9OydwAPAzObNg8NcLsIOAE7gbhkAsJBjcb9xQ8r+BSAAgORg\n3P/+d6dmAgA4eua+aZNTswHAwjZj3KPRqAoLCxUMBtXQ0DDp9aGhIW3fvl0lJSXauHGjfvnLX2Zj\nnQCAWUgZ92QyqdraWkWjUZ0+fVrNzc3q6+ubMKaxsVH33XefTp48qVgspt27d+s6H7cEADmVMu7d\n3d0KBALKz8+Xx+NRRUWFWltbJ4xZs2aNLl++LEm6fPmyVqxYIfcUH7V07px08WIGVw4AmFbKD7wb\nHByU3+8fO/b5fOrq6powprq6Wg899JDWrl2rK1eu6Ne//vWU73XgQJ1uv12qq5PC4bDC4fBNLx4A\nbBKLxRSLxTLyXinj7krj8/D27t2rkpISxWIxnT17Vo888oh6enq0dOnSCeMCgTolk6NxBwBM9ukT\n3/r6+jm/V8rLMl6vV4lEYuw4kUjI5/NNGNPR0aEnn3xSklRQUKB77rlHZ86cmTzRImn9+jmvEwAw\nCynjXlpaqng8rv7+fg0PD6ulpUWRSGTCmMLCQh08eFCSdOHCBZ05c0br1q2b9F4ul5SXl8GVAwCm\nlfKyjNvtVmNjo8rLy5VMJlVVVaWioiI1NTVJkmpqarRnzx5VVlYqFArpxo0bev7557V8+XJHFg8A\nmJpjT4X88Y+NPvMZac+ebM8GAHaYN0+FBAA4g7gDgIWIOwBYiLgDgIWIOwBYiLgDgIWIOwBYiLgD\ngIWIOwBYiLgDgIUci/vgoFMzAQAci/uFC9Lttzs1GwAsbI7Fva9PmuJJwACALHAs7teuSYWFTs0G\nAAubY3H/7GelO+5wajYAWNi4WwYALETcAcBCxB0ALETcAcBCxB0ALETcAcBCxB0ALETcAcBCxB0A\nLORY3M+dkzwep2YDgIXNZYwxWZ/E5ZJklP2ZAMAeLpdLc020Y2fud97p1EwAAK65A4CFiDsAWIi4\nA4CFiDsAWIi4A4CFiDsAWIi4A4CFiDsAWIi4A4CFZox7NBpVYWGhgsGgGhoaphwTi8V03333aePG\njQqHw5leIwBgllI+WyaZTGr9+vU6ePCgvF6v7r//fjU3N6uoqGhszKVLl7Rlyxa99dZb8vl8Ghoa\n0sqVKydO4nLpzjuN/vvf7P1FAMA2WXu2THd3twKBgPLz8+XxeFRRUaHW1tYJY1577TU9/vjj8vl8\nkjQp7AAA57lTvTg4OCi/3z927PP51NXVNWFMPB7XyMiItm3bpitXruj73/++vvWtb016r+vX61RX\nN/r7cDjM5RsA+JRYLKZYLJaR90oZ99FH9aY2MjKi48eP69ChQ7p27ZrKysr0wAMPKBgMTpzIPR53\nAMBknz7xra+vn/N7pYy71+tVIpEYO04kEmOXXz7m9/u1cuVKLV68WIsXL9aDDz6onp6eSXEHADgn\n5TX30tJSxeNx9ff3a3h4WC0tLYpEIhPGfP3rX9ef//xnJZNJXbt2TV1dXdqwYUNWFw0ASC3lmbvb\n7VZjY6PKy8uVTCZVVVWloqIiNTU1SZJqampUWFio7du3q7i4WIsWLVJ1dTVxB4Acc+xj9rgVEgBm\nZ158zB4AwDnEHQAsRNwBwELEHQAsRNwBwELEHQAsRNwBwEKO3ecuGWV/JgCwx7y4zz0vz6mZAACO\nxZ3niAGAcxyL+9WrTs0EAHAs7qtXOzUTAMCxuOfnOzUTAIBbIQHAQsQdACxE3AHAQsQdACxE3AHA\nQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQd\nACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQjPGPRqNqrCwUMFgUA0NDdOOO3r0qNxut373u99ldIEA\ngNlLGfdkMqna2lpFo1GdPn1azc3N6uvrm3LcD3/4Q23fvl3GmKwtFgCQnpRx7+7uViAQUH5+vjwe\njyoqKtTa2jpp3AsvvKAnnnhCq1atytpCAQDpc6d6cXBwUH6/f+zY5/Opq6tr0pjW1lb96U9/0tGj\nR+VyuaZ8r5Mn61RXN/r7cDiscDh8UwsHANvEYjHFYrGMvFfKuE8X6k969tlntW/fPrlcLhljpr0s\nU1IyHncAwGSfPvGtr6+f83uljLvX61UikRg7TiQS8vl8E8YcO3ZMFRUVkqShoSG1t7fL4/EoEonM\neVEAgJuTMu6lpaWKx+Pq7+/X2rVr1dLSoubm5gljzp07N/b7yspKfe1rXyPsAJBjKePudrvV2Nio\n8vJyJZNJVVVVqaioSE1NTZKkmpoaRxYJAJgdl3Hg3kWXy6WdO43278/2TABgj4+/lzkX/IQqAFiI\nuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOA\nhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7\nAFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFiIuAOAhYg7AFhoxrhHo1EVFhYqGAyqoaFh0uu/\n+tWvFAqFVFxcrC1btqi3tzcrCwUApM+d6sVkMqna2lodPHhQXq9X999/vyKRiIqKisbGrFu3Tu+8\n847y8vIUjUb13e9+V52dnVlfOABgeinP3Lu7uxUIBJSfny+Px6OKigq1trZOGFNWVqa8vDxJ0ubN\nmzUwMJC91QIA0pLyzH1wcFB+v3/s2Ofzqaura9rxL730knbs2DHlaydP1qmubvT34XBY4XB41osF\nAJvFYjHFYrGMvFfKuLtcrrTf6PDhw3r55Zd15MiRKV8vKRmPOwBgsk+f+NbX18/5vVLG3ev1KpFI\njB0nEgn5fL5J43p7e1VdXa1oNKply5bNeTEAgMxIec29tLRU8Xhc/f39Gh4eVktLiyKRyIQx//zn\nP/XNb35Tr776qgKBQFYXCwBIT8ozd7fbrcbGRpWXlyuZTKqqqkpFRUVqamqSJNXU1Oi5557TxYsX\ntWvXLkmSx+NRd3d39lcOAJiWyxhjsj6Jy6WdO43278/2TABgD5fLpbkmmp9QBQALEXcAsBBxBwAL\nEXcAsBBxBwALEXcAsBBxBwALEXcAsBBxBwALEXcAsBBxBwALEXcAsBBxBwALEXcAsBBxBwALEXcA\nsJBjcU8mnZoJAOBY3PPynJoJAOBY3PPznZoJAMA1dwCwEHEHAAsRdwCwEHEHAAsRdwCwEHEHAAsR\ndwCwEHEHAAsRdwCwEHEHAAsRdwCwEHEHAAsRdwCwEHEHAAsRdwCwEHEHAAsRd4fFYrFcL+GWwV6M\nYy/GsReZMWPco9GoCgsLFQwG1dDQMOWYZ555RsFgUKFQSCdOnMj4Im3CP9xx7MU49mIce5EZKeOe\nTCZVW1uraDSq06dPq7m5WX19fRPGtLW16b333lM8HteLL76oXbt2ZXXBAICZpYx7d3e3AoGA8vPz\n5fF4VFFRodbW1glj3njjDT399NOSpM2bN+vSpUu6cOFC9lYMAJiZSeH111833/nOd8aODxw4YGpr\nayeM+epXv2qOHDkydvzwww+bv/zlLxPGSOKLL7744msOX3PlVgoulyvVy2NG+z39n/v06wCA7Ep5\nWcbr9SqRSIwdJxIJ+Xy+lGMGBgbk9XozvEwAwGykjHtpaani8bj6+/s1PDyslpYWRSKRCWMikYhe\neeUVSVJnZ6fuvvturV69OnsrBgDMKOVlGbfbrcbGRpWXlyuZTKqqqkpFRUVqamqSJNXU1GjHjh1q\na2tTIBDQkiVLtH//fkcWDgBIYc5X66fQ3t5u1q9fbwKBgNm3b9+UY773ve+ZQCBgiouLzfHjxzM5\n/S1lpr149dVXTXFxsdm0aZP50pe+ZHp6enKwSmek8+/CGGO6u7vNbbfdZn772986uDpnpbMXhw8f\nNiUlJebee+81X/7yl51doINm2osPPvjAlJeXm1AoZO69916zf/9+5xfpgMrKSvO5z33ObNy4cdox\nc+lmxuJ+/fp1U1BQYM6fP2+Gh4dNKBQyp0+fnjDmzTffNI8++qgxxpjOzk6zefPmTE1/S0lnLzo6\nOsylS5eMMaP/yBfyXnw8btu2beaxxx4zv/nNb3Kw0uxLZy8uXrxoNmzYYBKJhDFmNHA2SmcvfvrT\nn5of/ehHxpjRfVi+fLkZGRnJxXKz6p133jHHjx+fNu5z7WbGHj/APfHj0tmLsrIy5eXlSRrdi4GB\ngVwsNevS2QtJeuGFF/TEE09o1apVOVilM9LZi9dee02PP/742I0LK1euzMVSsy6dvVizZo0uX74s\nSbp8+bJWrFghtzvlleR5aevWrVq2bNm0r8+1mxmL++DgoPx+/9ixz+fT4ODgjGNsjFo6e/FJL730\nknbs2OHE0hyX7r+L1tbWsZ9uTvcW3Pkmnb2Ix+P68MMPtW3bNpWWlurAgQNOL9MR6exFdXW1Tp06\npbVr1yoUCunnP/+508u8Jcy1mxn7z2Cm7om3wWz+TocPH9bLL7+sI0eOZHFFuZPOXjz77LPat2+f\nXC6XzOilQgdW5rx09mJkZETHjx/XoUOHdO3aNZWVlemBBx5QMBh0YIXOSWcv9u7dq5KSEsViMZ09\ne1aPPPKIenp6tHTpUgdWeGuZSzczFnfuiR+Xzl5IUm9vr6qrqxWNRlP+b9l8ls5eHDt2TBUVFZKk\noaEhtbe3y+PxTLrtdr5LZy/8fr9WrlypxYsXa/HixXrwwQfV09NjXdzT2YuOjg795Cc/kSQVFBTo\nnnvu0ZkzZ1RaWuroWnNtzt3MyHcEjDEjIyNm3bp15vz58+Z///vfjN9Qfffdd639JmI6e/GPf/zD\nFBQUmHfffTdHq3RGOnvxSTt37rT2bpl09qKvr888/PDD5vr16+bq1atm48aN5tSpUzlacfaksxc/\n+MEPTF1dnTHGmPfff994vV7z73//OxfLzbrz58+n9Q3V2XQzY2fu3BM/Lp29eO6553Tx4sWx68we\nj0fd3d25XHZWpLMXC0U6e1FYWKjt27eruLhYixYtUnV1tTZs2JDjlWdeOnuxZ88eVVZWKhQK6caN\nG3r++ee1fPnyHK8885566im9/fbbGhoakt/vV319vUZGRiTdXDddxlh6gRMAFjA+iQkALETcAcBC\nxB0ALETcAcBCxB0ALETcAcBC/wcelbcYT+bpDAAAAABJRU5ErkJggg==\n" } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "#write_output(bivoting, test_X)\n", "write_output(svc_classifier, verification_X)\n", "print 'writing verification on svc done...'\n", "#filter(lambda case: case[1] == 1, misclassified)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "writing verification on svc done...\n" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "## test naive bayesian bernoulli\n", "nb_bernoulli_classifier = naive_bayes.BernoulliNB(alpha = 0.1, fit_prior=True)# for tfidf l2 norm\n", "#nb_bernoulli_classifier = naive_bayes.BernoulliNB(alpha = 0.3, fit_prior=True) # for tfidf l1 norm\n", "misclassified = benchmark(nb_bernoulli_classifier)\n", "#negative_features_indices = np.argsort(nb_bernoulli_classifier.feature_log_prob_[0,:])[-30:]\n", "#negative_features = [tfidf_feature_names[i] for i in negative_features_indices]\n", "#print 'negative features...'\n", "#print negative_features\n", "#positive_features_indices = np.argsort(nb_bernoulli_classifier.feature_log_prob_[1,:])[-50:]\n", "#positive_features = [tfidf_feature_names[i] for i in positive_features_indices]\n", "#print 'positive features...'\n", "#print positive_features" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "BernoulliNB(alpha=0.1, binarize=0.0, fit_prior=True)\n", "training time: 0.02\n", "test time: 0.02\n", "confusion matrix:\n", "[[1772 182]\n", " [ 6 687]]\n", "classification rate:\n", "92.8976199471 %\n", "AUC for test: " ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0.995570561589\n", "ROC plot\n" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD9CAYAAABHnDf0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFfhJREFUeJzt3X9M1df9x/HX1ctWtMxKtV299y6o91ZQBDFY61LrdU3F\nug23/tgwW1cpY8SM1XUma+OSFZrFlMZlXUba0KbazcUfq22GXfXaaXp1LQprbdmmxlEnzYV0RqYo\nK02B6+f7B9/CELhc4N7PhcPzkdzAJ5/DOW9P8OXHc8/ncx2WZVkCABhlUqILAADEHuEOAAYi3AHA\nQIQ7ABiIcAcAAxHuAGCgiOH+8MMP6+abb9bChQsHbfPII4/I5/MpOztb7733XswLBAAMX8RwLyws\nVCAQGPT8/v379cEHH6ihoUHPP/+8NmzYEPMCAQDDFzHcly9frunTpw96ft++fXrooYckSUuXLlVr\na6vOnz8f2woBAMPmHM0PNzc3y+Px9By73W41NTXp5ptv7tPO4XCMZhgAmLBG+hCBUYX7QAMPFuRj\n/SkHra1STc3wfubIEWnnTmnatO7jkyelO+6Q0tKkq1el66+Xnnii78/88pdl2rSpLBYlj3vMRS/m\nohdz0cvlGvmF8ajC3eVyKRQK9Rw3NTXJ5XKNpsuY+/RT6f33e48fe0zq7JSc1/zJjx2Turqk1auj\n77ujQyoulu69t/s4KUm69VYp0n9UUlKkWbOiH8NkzEUv5qIXcxEbowr3/Px8VVZWqqCgQMePH9cN\nN9zQb0nGLl1d0v33S5OueRfh+HHpo4+k227rPr54UXr2Welzn+vfx/z50syZ8a8VAOItYrivW7dO\nR44cUUtLizwej8rLy9XZ2SlJKikp0Zo1a7R//355vV5NnTpV27dvt6Xo/9XeLjU1SV/7mtTQIL3y\nSt/z3/2ulJUleb22lzYgv9+f6BLGDOaiF3PRi7mIDYcdj/x1OBwxW3N/4QXp7Nne44qK7q8ej/Ta\na1J2dkyGAYCEG012jrtwT0uTvv1t6bMdmlOnSqWlkde5AWA8Gk12jnq3jB3eeks6c6b7+7Y2acOG\n7pAHAAxszIZ7V5e0Y4e0cWN3oC9fLvl83VftCXrPFgDGjTG7LHPggLRmjfSDH0g//ak0Zw5LLwAm\nFiPX3O+6q3tb45//HKeiAGCMG024j8lH/lpW981HjzyS6EoAYHwak+G+c6f09tusrQPASI25cH/p\npe4bj77znd67SgEAwzOmwv2tt6TCQunRR6Vf/SrR1QDA+DVm3lB95pnuUF+1Sjp4MN4VAcDYN+53\ny7S0dD+w6+GHpeeflyZPjndFADD2jfvdMkePdn997jmCHQBiYUyE+29/Ky1ZMvBjeAEAwzcmwn3S\nJOnBBxNdBQCYY0yEu9MpffGLia4CAMyR0HC3LGnbNmnvXukLX0hkJQBgloTulmlv7w71zZulJ5+M\ndxUAML6M262QL78sfetb3VfwAIC+xu1WyIIC6XvfS2QFAGCmhF65T58u/eMfkssV7woAYPwZl1fu\nly9Lra1SSkqiKgAAcyUs3H/+8+6v7JIBgNhLWLh3dUmVlYkaHQDMlrBw//vfEzUyAJgvIeH+8cfS\nX/4iZWUlYnQAMF9Cdst88IHk87G/HQAiGXe7Zfbtk2bNSsTIADAxJCTcJ02SHnggESMDwMQwZLgH\nAgGlp6fL5/OpoqKi3/lLly7pm9/8prKzs7V06VKdPHlyyEEtiyUZAIiniOEeDodVWlqqQCCgU6dO\nadeuXTp9+nSfNlu2bNHixYtVX1+v3/3ud9q4cWPEATs7pZ/8hP3tABBPEcO9rq5OXq9XaWlpSkpK\nUkFBgaqrq/u0OX36tFauXClJmjdvnhobG3XhwoVB+2xu7v5aXj7KygEAg3JGOtnc3CyPx9Nz7Ha7\nVVtb26dNdna2Xn31Vd1xxx2qq6vThx9+qKamJs2cObNPu7KyMknShQvS1Kl+TZrkj8kfAABMEQwG\nFQwGY9JXxHB3OBxDdvD4449r48aNysnJ0cKFC5WTk6PJA3zK9Wfhvm+f9PrrIysWAEzm9/vl9/t7\njstHscQRMdxdLpdCoVDPcSgUktvt7tMmJSVF27Zt6zmePXu25syZM2ifkydLCxaMtFwAQDQirrnn\n5uaqoaFBjY2N6ujo0J49e5Sfn9+nzeXLl9XR0SFJeuGFF7RixQpdf/318asYADCkiFfuTqdTlZWV\nysvLUzgcVlFRkTIyMlRVVSVJKikp0alTp7R+/Xo5HA5lZmbqxRdftKVwAMDgbH/8wOuvS88+y7o7\nAAxlXD1+oLlZ+v9VHABAnNge7hcvSlOn2j0qAEwstoe7wyHNm2f3qAAwsSTswzoAAPFje7i/+650\n9ardowLAxGJ7uL/8Ms9yB4B4s30rpMMhtbVJ3OcEAJGNm62QR492f01OtnNUAJh4bA33HTuk5cu7\nny8DAIgfW8O9pUVatcrOEQFgYrI13NvapFtvtXNEAJiYbAt3y5IOH+buVACwg227ZS5etJSaKnV1\nseYOANEYF7tlQiFp2jSCHQDsYOua+5e+ZOdoADBx8WwZADCQbeHe1ib99792jQYAE5tt4f6vf/HI\nAQCwi23hPnmylJlp12gAMLHZFu5dXVI4bNdoADCx2Rbu77wjffqpXaMBwMRmW7hff7102212jQYA\nExtbIQHAQIQ7ABiIcAcAAxHuAGAgwh0ADGRbuLPHHQDsM2S4BwIBpaeny+fzqaKiot/5lpYWrV69\nWosWLVJmZqZeeumlAfupqWGfOwDYJWK4h8NhlZaWKhAI6NSpU9q1a5dOnz7dp01lZaVycnL0/vvv\nKxgMatOmTerq6urXV2qqtHhxbIsHAAwsYrjX1dXJ6/UqLS1NSUlJKigoUHV1dZ82t9xyi65cuSJJ\nunLlim688UY5nc74VQwAGFLEFG5ubpbH4+k5drvdqq2t7dOmuLhYX/nKVzRr1iy1tbXpD3/4w4B9\nnTlTpt27pffek/x+v/x+/+irBwCDBINBBYPBmPQVMdwdDseQHWzZskWLFi1SMBjU2bNndffdd6u+\nvl4pKSl92n3ySZkeeEC6997RFQwAprr2wre8vHzEfUVclnG5XAqFQj3HoVBIbre7T5uamho98MAD\nkqS5c+dq9uzZOnPmTL++2tul9PQR1wkAGIaI4Z6bm6uGhgY1Njaqo6NDe/bsUX5+fp826enpOnTo\nkCTp/PnzOnPmjObMmdOvry98Qfr852NYOQBgUBGXZZxOpyorK5WXl6dwOKyioiJlZGSoqqpKklRS\nUqLNmzersLBQ2dnZunr1qp5++mmlpqbaUjwAYGAOy7KsuA/icGjOHEtvvCHNnRvv0QDADA6HQyON\naB4/AAAGsi3cP/rIrpEAALaF+yefSDfdZNdoADCx2Rbu110nXbP1HQAQJ6y5A4CBCHcAMBDhDgAG\nItwBwECEOwAYiHAHAAMR7gBgIMIdAAxEuAOAgQh3ADAQ4Q4ABrLtee6SpfiPBADmGBfPc7/uOrtG\nAgDYFu4DfKwqACBObAt3Z8RPawUAxJJt4b54sV0jAQDYLQMABiLcAcBAhDsAGIhwBwADEe4AYCDC\nHQAMRLgDgIEIdwAw0JDhHggElJ6eLp/Pp4qKin7nt27dqpycHOXk5GjhwoVyOp1qbW2NS7EAgOhE\nfCpkOBzWvHnzdOjQIblcLi1ZskS7du1SRkbGgO3/9Kc/6ZlnntGhQ4f6DuJwaP16S9u3x7Z4ADBZ\n3J4KWVdXJ6/Xq7S0NCUlJamgoEDV1dWDtt+5c6fWrVs3okIAALET8XFezc3N8ng8Pcdut1u1tbUD\ntm1vb9fBgwf17LPPDnj+nXfKVFbW/b3f75ff7x9RwQBgqmAwqGAwGJO+IoZ794dsROe1117THXfc\noRtuuGHA82vX9oY7AKC/ay98y8vLR9xXxGUZl8ulUCjUcxwKheR2uwdsu3v37ohLMtOnj7BCAMCw\nRQz33NxcNTQ0qLGxUR0dHdqzZ4/y8/P7tbt8+bKOHj2qtWvXxq1QAED0Ii7LOJ1OVVZWKi8vT+Fw\nWEVFRcrIyFBVVZUkqaSkRJL0xz/+UXl5eUpOTo5/xQCAIdn2Adlbt1ratCneIwGAOcbFB2QDAOxD\nuAOAgQh3ADAQ4Q4ABiLcAcBAhDsAGIhwBwADEe4AYCDCHQAMRLgDgIEIdwAwEOEOAAYi3AHAQIQ7\nABiIcAcAAxHuAGAgwh0ADES4A4CBCHcAMBDhDgAGItwBwECEOwAYiHAHAAMR7gBgIMIdAAxEuAOA\ngQh3ADAQ4Q4ABhoy3AOBgNLT0+Xz+VRRUTFgm2AwqJycHGVmZsrv98e6RgDAMDkjnQyHwyotLdWh\nQ4fkcrm0ZMkS5efnKyMjo6dNa2urfvjDH+rgwYNyu91qaWmJe9EAgMgiXrnX1dXJ6/UqLS1NSUlJ\nKigoUHV1dZ82O3fu1H333Se32y1JmjFjRvyqBQBEJeKVe3NzszweT8+x2+1WbW1tnzYNDQ3q7OzU\nypUr1dbWpo0bN+rBBx/s19cbb5Spra37e7/fz/INAFwjGAwqGAzGpK+I4e5wOIbsoLOzUydOnNDh\nw4fV3t6uZcuW6fbbb5fP5+vTbtWqMm3aNLpiAcBk1174lpeXj7iviOHucrkUCoV6jkOhUM/yy2c8\nHo9mzJih5ORkJScn684771R9fX2/cAcA2Cfimntubq4aGhrU2Niojo4O7dmzR/n5+X3arF27Vm+9\n9ZbC4bDa29tVW1ur+fPnx7VoAEBkEa/cnU6nKisrlZeXp3A4rKKiImVkZKiqqkqSVFJSovT0dK1e\nvVpZWVmaNGmSiouLCXcASDCHZVlW3AdxOLR1q8WaOwAMg8Ph0EgjmjtUAcBAhDsAGIhwBwADEe4A\nYCDCHQAMRLgDgIEIdwAwEOEOAAYi3AHAQIQ7ABiIcAcAAxHuAGAgwh0ADES4A4CBCHcAMBDhDgAG\nItwBwECEOwAYiHAHAAMR7gBgIMIdAAxEuAOAgQh3ADAQ4Q4ABiLcAcBAhDsAGIhwBwADEe4AYCDC\nHQAMNGS4BwIBpaeny+fzqaKiot/5YDCoadOmKScnRzk5OfrFL34Rl0IBANFzRjoZDodVWlqqQ4cO\nyeVyacmSJcrPz1dGRkafditWrNC+ffviWigAIHoRr9zr6urk9XqVlpampKQkFRQUqLq6ul87y7Li\nViAAYPgiXrk3NzfL4/H0HLvdbtXW1vZp43A4VFNTo+zsbLlcLm3dulXz58/v19cbb5Spra37e7/f\nL7/fP/rqAcAgwWBQwWAwJn1FDHeHwzFkB4sXL1YoFNKUKVN04MABfeMb39A///nPfu1WrSrTpk0j\nLxQATHfthW95efmI+4q4LONyuRQKhXqOQ6GQ3G53nzYpKSmaMmWKJOmee+5RZ2enLl68OOKCAACj\nFzHcc3Nz1dDQoMbGRnV0dGjPnj3Kz8/v0+b8+fM9a+51dXWyLEupqanxqxgAMKSIyzJOp1OVlZXK\ny8tTOBxWUVGRMjIyVFVVJUkqKSnR3r179dxzz8npdGrKlCnavXu3LYUDAAbnsGzY6uJwOLR1q8Wa\nOwAMg8PhGPFuRO5QBQADEe4AYCDCHQAMRLgDgIEIdwAwEOEOAAYi3AHAQIQ7ABiIcAcAAxHuAGAg\nwh0ADES4A4CBCHcAMBDhDgAGItwBwECEOwAYiHAHAAMR7gBgIMIdAAxEuAOAgQh3ADAQ4Q4ABiLc\nAcBAhDsAGIhwBwADEe4AYCDCHQAMRLjbLBgMJrqEMYO56MVc9GIuYmPIcA8EAkpPT5fP51NFRcWg\n7f7617/K6XTq1VdfjWmBpuEXtxdz0Yu56MVcxEbEcA+HwyotLVUgENCpU6e0a9cunT59esB2jz32\nmFavXi3LsuJWLAAgOhHDva6uTl6vV2lpaUpKSlJBQYGqq6v7tfvNb36j+++/XzNnzoxboQCAYbAi\nePnll63vf//7Pcc7duywSktL+7Rpamqy/H6/dfXqVWv9+vXWK6+80q8fSbx48eLFawSvkXIqAofD\nEem0JOnHP/6xnnrqKTkcDlmWNeCyDEs1AGCviOHucrkUCoV6jkOhkNxud5827777rgoKCiRJLS0t\nOnDggJKSkpSfnx+HcgEA0XBYES6ru7q6NG/ePB0+fFizZs3Sbbfdpl27dikjI2PA9oWFhfr617+u\ne++9N24FAwCGFvHK3el0qrKyUnl5eQqHwyoqKlJGRoaqqqokSSUlJbYUCQAYphGv1g/gwIED1rx5\n8yyv12s99dRTA7b50Y9+ZHm9XisrK8s6ceJELIcfU4aai9///vdWVlaWtXDhQuvLX/6yVV9fn4Aq\n7RHN74VlWVZdXZ01efLkAd+UN0U0c/Hmm29aixYtshYsWGCtWLHC3gJtNNRcXLhwwcrLy7Oys7Ot\nBQsWWNu3b7e/SBsUFhZaN910k5WZmTlom5HkZszCvaury5o7d6517tw5q6Ojw8rOzrZOnTrVp83r\nr79u3XPPPZZlWdbx48etpUuXxmr4MSWauaipqbFaW1sty+r+JZ/Ic/FZu5UrV1pf/epXrb179yag\n0viLZi4uXbpkzZ8/3wqFQpZldQeciaKZiyeeeMJ6/PHHLcvqnofU1FSrs7MzEeXG1dGjR60TJ04M\nGu4jzc2YPX4gmj3x+/bt00MPPSRJWrp0qVpbW3X+/PlYlTBmRDMXy5Yt07Rp0yR1z0VTU1MiSo07\n7pXoFc1c7Ny5U/fdd1/PxoUZM2YkotS4i2YubrnlFl25ckWSdOXKFd14441yOiOuJI9Ly5cv1/Tp\n0wc9P9LcjFm4Nzc3y+Px9By73W41NzcP2cbEUItmLv7Xiy++qDVr1thRmu2i/b2orq7Whg0bJEW3\nBXc8imYuGhoadPHiRa1cuVK5ubnasWOH3WXaIpq5KC4u1smTJzVr1ixlZ2fr17/+td1ljgkjzc2Y\n/TMY7V9I65rNOSb+RR7On+nNN9/Utm3b9Pbbb8exosSJ1b0SJohmLjo7O3XixAkdPnxY7e3tWrZs\nmW6//Xb5fD4bKrRPNHOxZcsWLVq0SMFgUGfPntXdd9+t+vp6paSk2FDh2DKS3IxZuEezJ/7aNk1N\nTXK5XLEqYcyIZi4k6W9/+5uKi4sVCAQi/rdsPONeiV7RzIXH49GMGTOUnJys5ORk3Xnnnaqvrzcu\n3KOZi5qaGv3sZz+TJM2dO1ezZ8/WmTNnlJuba2utiTbi3IzJOwKWZXV2dlpz5syxzp07Z3366adD\nvqF67NgxY99EjGYuPvzwQ2vu3LnWsWPHElSlPaKZi/812CMsTBDNXJw+fdq66667rK6uLuvjjz+2\nMjMzrZMnTyao4viJZi4effRRq6yszLIsy/r3v/9tuVwu6z//+U8iyo27c+fORfWG6nByM2ZX7tHs\niV+zZo32798vr9erqVOnavv27bEafkyJZi6efPJJXbp0qWedOSkpSXV1dYksOy64V6JXNHORnp6u\n1atXKysrS5MmTVJxcbHmz5+f4MpjL5q52Lx5swoLC5Wdna2rV6/q6aefVmpqaoIrj71169bpyJEj\namlpkcfjUXl5uTo7OyWNLjcj3qEKABif+CQmADAQ4Q4ABiLcAcBAhDsAGIhwBwADEe4AYKD/A+pQ\nsEyrF82ZAAAAAElFTkSuQmCC\n" } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "write_output(nb_bernoulli_classifier, verification_X)\n", "print 'writing verification on naive bayesian bernoulli done...'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "writing verification on naive bayesian bernoulli done...\n" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "### classification by voting ####\n", "class BiVotingClassifier:\n", " def __init__(self, model1, model2):\n", " self.model1 = model1\n", " self.model2 = model2\n", " def fit(self, X, y):\n", " self.model1.fit(X, y)\n", " self.model2.fit(X, y)\n", " def predict(self, X):\n", " proba = self.predict_proba(X)\n", " return np.argmax(proba, axis = 1)\n", " def predict_proba(self, X):\n", " proba1 = self.model1.predict_proba(X)\n", " proba2 = self.model2.predict_proba(X)\n", " n_samples, n_labels = proba1.shape\n", " assert n_labels == 2\n", " proba = np.zeros((n_samples, n_labels))\n", " for i in xrange(n_samples):\n", " proba[i, 0] = max(proba1[i, 0], proba2[i, 0])\n", " proba[i, 1] = max(proba1[i, 1], proba2[i, 1])\n", " proba[i, ] /= sum(proba[i, ])\n", " \"\"\"\n", " if np.max(proba1[i,:]) > np.max(proba2[i,:]):\n", " proba[i, :] = proba1[i, :]\n", " else:\n", " proba[i, :] = proba2[i, :]\n", " \"\"\"\n", " return proba\n", "## C, alpha = (10, 0.1), (1, 0.3) \n", "#bivoting = BiVotingClassifier(svm.SVC(kernel = 'rbf', C = 10, gamma = 0.01, tol=0.0001, probability = True), \n", "bivoting = BiVotingClassifier(svm.SVC(kernel = 'linear', C = 1, gamma = 0.01, tol=0.0001, probability = True), # for l2 norm\n", "#bivoting = BiVotingClassifier(svm.SVC(kernel = 'linear', C = 10, gamma = 0.01, tol=0.0001, probability = True), # for l1 norm\n", " naive_bayes.BernoulliNB(alpha = 0.3, fit_prior=True))\n", "misclassified = benchmark(bivoting)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "<__main__.BiVotingClassifier instance at 0x15a0febd8>\n", "training time: 67.90" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "test time: 9.00" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "confusion matrix:\n", "[[1822 132]\n", " [ 10 683]]\n", "classification rate:\n", "94.635436343 %\n", "AUC for test: 0.995702012079\n", "ROC plot\n" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD9CAYAAABHnDf0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAE2VJREFUeJzt3X9M1Pfhx/HX6V1b4qw/akfq3eVL9W4Fi0AXDLrO7lxj\nYK5jW+sSmmWpjDFiwjqX/tHN/TFolkaa7I+t/INJ6zJbCfuV0GR626xezbSAqxUXJeZ00h4sdbJJ\ntNoOON7fPy6FInCccPc5efN8JATOe/t5v/sOefb88OFzLmOMEQDAKouyvQAAQPoRdwCwEHEHAAsR\ndwCwEHEHAAsRdwCwUNK4f/e731Vubq7Wr18/7Zhnn31WwWBQxcXFevfdd9O+QADA7Usa9+rqaoXD\n4WmfP3jwoC5cuKBoNKq9e/dq586daV8gAOD2JY375s2btWLFimmff+ONN/TMM89IksrKyjQ4OKjL\nly+nd4UAgNvmnstf7u/vl9/vH3vs8/nU19en3NzcCeNcLtdcpgGABWu2NxGYU9ynmni6kN9pdzl4\n/33p//5PWrxYisele+6RNm6cfvzHH0sFBdLnPz/zsV0u6RvfSHy+1S9+0aDnnmuY9bptwl6MYy/G\nsRfjvN7ZvzCeU9y9Xq9isdjY476+Pnm93rkcck66uqRPLWeCSES6ciUR8wMHEn+2dKl08aLkdkv3\n3pt4LtOWLpVWr878PPMBezGOvRjHXqTHnOJeWVmp5uZmVVVVqaOjQ8uXL590SmY2Rkak4WGpo0P6\n17+mH3f6tPTXv0rd3eN/tmWLNNWPCYaGEq+6P/e5xKvqJ56Q7r5bWsTFoAAslDTuTz/9tN566y0N\nDAzI7/ersbFRw8PDkqS6ujpt27ZNBw8eVCAQ0JIlS7Rv3745LSYalb79benkyfE/q6iQVq6cenw8\nLpWXS/v3S0mu1ryjhEKhbC/hjsFejGMvxrEX6eFy4pa/Lpcr6Tn3jz+Wnn9e+tWvpNxcqbVVCoWm\nPmcNAAvFTO1MJusnJfbskXJyEmFvbpY++CBxaoWwA8DsZf2V+xe/KAUC0t690l13ZXolADB/zOWV\ne1bjPjqauELlL3+Rtm7N9CoAYH6Zt6dldu1KfN6wIZurAAD7ZDXur78u1dVJy5dncxUAYJ+snZa5\neVNaskS6dEnKy8v0CgBg/pl359w/+ihxyeP169IddlcCALhjzLtz7nV1ibD39WVjdgCwX1bi7nZL\nP/+5lMXb0ACA1bIS9/feS5yWAQBkhuNxHx6WjhzhChkAyCTH497Rkfj8zW86PTMALByOx/3IEWnN\nGmfunQ4AC5Xjcb/nHmn7dqdnBYCFxfG4Hzggffih07MCwMLi6C8xfXKjsI4Oqaws07MCwPw2b36J\n6cKFxGduFAYAmeVo3I1JvIcp71sKAJlFZgHAQo7G/cgR6d//dnJGAFiYHI37lSvSk086OSMALEyO\nxn1wUFq92skZAWBhcjTu5887ORsALFyOxn3JEmn9eidnBICFiatlAMBCxB0ALORo3P/xDyked3JG\nAFiYHI37hx9KgYCTMwLAwuRY3I1JvCH2vfc6NSMALFyO3RUyHjdavFgaHZVcrkzPCADz37y5K6TL\nRdgBwAlcLQMAFnIs7tevJ867AwAyz7G4Hzrk1EwAAMfiPjrKG2MDgFNmjHs4HFZ+fr6CwaCampom\nPT8wMKCKigqVlJSosLBQv/71r6c9lsczp7UCAFKUNO7xeFz19fUKh8M6d+6cWltb1dPTM2FMc3Oz\nHnnkEZ0+fVqRSETPPfecRkZGMrpoAEBySePe1dWlQCCgvLw8eTweVVVVqb29fcKYBx54QNeuXZMk\nXbt2Tffdd5/cbvekY509K338cRpXDgCY1uQKf0p/f7/8fv/YY5/Pp87Ozgljamtr9eUvf1mrV6/W\n9evX9dvf/nbKYx071qCREamhQQqFQgqFQnNePADYJBKJKBKJpOVYSePuSuE3jl588UWVlJQoEono\n4sWL2rp1q7q7u7V06dIJ4zZvbtBnPiPt3j23BQOArW594dvY2DjrYyU9LeP1ehWLxcYex2Ix+Xy+\nCWNOnDihb33rW5KktWvX6sEHH9R53nIJALIqadxLS0sVjUbV29uroaEhtbW1qbKycsKY/Px8HT58\nWJJ0+fJlnT9/XmvWrMncigEAM0p6Wsbtdqu5uVnl5eWKx+OqqalRQUGBWlpaJEl1dXXavXu3qqur\nVVxcrNHRUb300ktauXKlI4sHAEzNsbtC/uQnhnPuAHAb5s1dIQEAziDuAGAh4g4AFiLuAGAh4g4A\nFiLuAGAh4g4AFiLuAGAh4g4AFiLuAGAhx+Le3+/UTAAAx+J++bJ0111OzQYAC5tjce/pkbgTMAA4\nw7G437wp5ec7NRsALGyOxf3ee6W773ZqNgBY2LhaBgAsRNwBwELEHQAsRNwBwELEHQAsRNwBwELE\nHQAsRNwBwEKOxf3GDadmAgA4euOw5cudmg0AFjaXMcZkfBKXS/fcY/TRR5meCQDs4XK5NNtEc84d\nACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQjPGPRwOKz8/X8FgUE1N\nTVOOiUQieuSRR1RYWKhQKJTuNQIAblPSe8vE43E99NBDOnz4sLxerzZs2KDW1lYVFBSMjRkcHNSj\njz6qP//5z/L5fBoYGNCqVasmTsK9ZQDgtmXs3jJdXV0KBALKy8uTx+NRVVWV2tvbJ4w5cOCAnnrq\nKfl8PkmaFHYAgPPcyZ7s7++X3+8fe+zz+dTZ2TlhTDQa1fDwsLZs2aLr16/rhz/8ob7zne9MOtbI\nSIMaGhJfh0IhTt8AwC0ikYgikUhajpU07i6Xa8YDDA8P69SpU3rzzTd18+ZNbdq0SRs3blQwGJw4\nkXs87gCAyW594dvY2DjrYyWNu9frVSwWG3sci8XGTr98wu/3a9WqVcrJyVFOTo4ee+wxdXd3T4o7\nAMA5Sc+5l5aWKhqNqre3V0NDQ2pra1NlZeWEMV//+tf1t7/9TfF4XDdv3lRnZ6fWrVuX0UUDAJJL\n+srd7XarublZ5eXlisfjqqmpUUFBgVpaWiRJdXV1ys/PV0VFhYqKirRo0SLV1tYSdwDIMt5mDwDu\nULzNHgBgAuIOABYi7gBgIeIOABYi7gBgIeIOABYi7gBgIceuc5eMMj8TANhjXlznvmyZUzMBAByL\nO/cRAwDnOBb3GzecmgkA4Fjcc3OdmgkA4Fjc8/KcmgkAwKWQAGAh4g4AFiLuAGAh4g4AFiLuAGAh\n4g4AFiLuAGAh4g4AFiLuAGAh4g4AFiLuAGAh4g4AFiLuAGAh4g4AFiLuAGAh4g4AFiLuAGAh4g4A\nFiLuAGAh4g4AFiLuAGAh4g4AFiLuAGChGeMeDoeVn5+vYDCopqamacedPHlSbrdbf/zjH9O6QADA\n7Usa93g8rvr6eoXDYZ07d06tra3q6emZctzzzz+viooKGWMytlgAQGqSxr2rq0uBQEB5eXnyeDyq\nqqpSe3v7pHEvv/yytm/frvvvvz9jCwUApM6d7Mn+/n75/f6xxz6fT52dnZPGtLe368iRIzp58qRc\nLteUxzp9ukENDYmvQ6GQQqHQnBYOALaJRCKKRCJpOVbSuE8X6k/btWuX9uzZI5fLJWPMtKdlSkrG\n4w4AmOzWF76NjY2zPlbSuHu9XsVisbHHsVhMPp9vwph33nlHVVVVkqSBgQEdOnRIHo9HlZWVs14U\nAGBuksa9tLRU0WhUvb29Wr16tdra2tTa2jphzD//+c+xr6urq/W1r32NsANAliWNu9vtVnNzs8rL\nyxWPx1VTU6OCggK1tLRIkurq6hxZJADg9riMA9cuulwu7dhhtG9fpmcCAHt88rPM2eA3VAHAQsQd\nACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE\n3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHA\nQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQsQdACxE3AHAQjPGPRwOKz8/X8FgUE1NTZOef/31\n11VcXKyioiI9+uijOnPmTEYWCgBInTvZk/F4XPX19Tp8+LC8Xq82bNigyspKFRQUjI1Zs2aNjh07\npmXLlikcDuv73/++Ojo6Mr5wAMD0kr5y7+rqUiAQUF5enjwej6qqqtTe3j5hzKZNm7Rs2TJJUllZ\nmfr6+jK3WgBASpK+cu/v75ff7x977PP51NnZOe34V155Rdu2bZvyudOnG9TQkPg6FAopFArd9mIB\nwGaRSESRSCQtx0oad5fLlfKBjh49qldffVXHjx+f8vmSkvG4AwAmu/WFb2Nj46yPlTTuXq9XsVhs\n7HEsFpPP55s07syZM6qtrVU4HNaKFStmvRgAQHokPedeWlqqaDSq3t5eDQ0Nqa2tTZWVlRPGvP/+\n+3ryySf12muvKRAIZHSxAIDUJH3l7na71dzcrPLycsXjcdXU1KigoEAtLS2SpLq6Or3wwgu6evWq\ndu7cKUnyeDzq6urK/MoBANNyGWNMxidxubRjh9G+fZmeCQDs4XK5NNtE8xuqAGAh4g4AFiLuAGAh\n4g4AFiLuAGAh4g4AFiLuAGAh4g4AFnIs7qOjTs0EAHAs7ov4NwIAOMax5BYWOjUTAIDX0wBgIeIO\nABYi7gBgIeIOABYi7gBgIeIOABYi7gBgIeIOABYi7gBgIeIOABYi7gBgIeIOABYi7gBgIeIOABYi\n7gBgIeIOABYi7gBgIeIOABYi7gBgIeIOABYi7gBgIeIOABYi7gBgIeIOABYi7g6LRCLZXsIdg70Y\nx16MYy/SY8a4h8Nh5efnKxgMqqmpacoxzz77rILBoIqLi/Xuu++mfZE24Rt3HHsxjr0Yx16kR9K4\nx+Nx1dfXKxwO69y5c2ptbVVPT8+EMQcPHtSFCxcUjUa1d+9e7dy5M6MLBgDMLGncu7q6FAgElJeX\nJ4/Ho6qqKrW3t08Y88Ybb+iZZ56RJJWVlWlwcFCXL1/O3IoBADMzSfzud78z3/ve98Ye79+/39TX\n108Y88QTT5jjx4+PPX788cfN3//+9wljJPHBBx988DGLj9lyKwmXy5Xs6TGJfk//9259HgCQWUlP\ny3i9XsVisbHHsVhMPp8v6Zi+vj55vd40LxMAcDuSxr20tFTRaFS9vb0aGhpSW1ubKisrJ4yprKzU\nb37zG0lSR0eHli9frtzc3MytGAAwo6SnZdxut5qbm1VeXq54PK6amhoVFBSopaVFklRXV6dt27bp\n4MGDCgQCWrJkifbt2+fIwgEAScz6bP0UDh06ZB566CETCATMnj17phzzgx/8wAQCAVNUVGROnTqV\nzunvKDPtxWuvvWaKiorM+vXrzRe+8AXT3d2dhVU6I5XvC2OM6erqMosXLzZ/+MMfHFyds1LZi6NH\nj5qSkhLz8MMPmy996UvOLtBBM+3FlStXTHl5uSkuLjYPP/yw2bdvn/OLdEB1dbX57Gc/awoLC6cd\nM5tupi3uIyMjZu3atebSpUtmaGjIFBcXm3Pnzk0Y86c//cl85StfMcYY09HRYcrKytI1/R0llb04\nceKEGRwcNMYkvskX8l58Mm7Lli3mq1/9qvn973+fhZVmXip7cfXqVbNu3ToTi8WMMYnA2SiVvfjZ\nz35mfvzjHxtjEvuwcuVKMzw8nI3lZtSxY8fMqVOnpo37bLuZttsPcE38uFT2YtOmTVq2bJmkxF70\n9fVlY6kZl8peSNLLL7+s7du36/7778/CKp2Ryl4cOHBATz311NiFC6tWrcrGUjMulb144IEHdO3a\nNUnStWvXdN9998ntTnomeV7avHmzVqxYMe3zs+1m2uLe398vv98/9tjn86m/v3/GMTZGLZW9+LRX\nXnlF27Ztc2Jpjkv1+6K9vX3st5tTvQR3vkllL6LRqP773/9qy5YtKi0t1f79+51epiNS2Yva2lqd\nPXtWq1evVnFxsX75y186vcw7wmy7mbb/Dabrmngb3M5/09GjR/Xqq6/q+PHjGVxR9qSyF7t27dKe\nPXvkcrlkEqcKHViZ81LZi+HhYZ06dUpvvvmmbt68qU2bNmnjxo0KBoMOrNA5qezFiy++qJKSEkUi\nEV28eFFbt25Vd3e3li5d6sAK7yyz6Wba4s418eNS2QtJOnPmjGpraxUOh5P+s2w+S2Uv3nnnHVVV\nVUmSBgYGdOjQIXk8nkmX3c53qeyF3+/XqlWrlJOTo5ycHD322GPq7u62Lu6p7MWJEyf005/+VJK0\ndu1aPfjggzp//rxKS0sdXWu2zbqbafmJgDFmeHjYrFmzxly6dMn873//m/EHqm+//ba1P0RMZS/e\ne+89s3btWvP2229naZXOSGUvPm3Hjh3WXi2Tyl709PSYxx9/3IyMjJgbN26YwsJCc/bs2SytOHNS\n2Ysf/ehHpqGhwRhjzAcffGC8Xq/5z3/+k43lZtylS5dS+oHq7XQzba/cuSZ+XCp78cILL+jq1atj\n55k9Ho+6urqyueyMSGUvFopU9iI/P18VFRUqKirSokWLVFtbq3Xr1mV55emXyl7s3r1b1dXVKi4u\n1ujoqF566SWtXLkyyytPv6efflpvvfWWBgYG5Pf71djYqOHhYUlz66bLGEtPcALAAsY7MQGAhYg7\nAFiIuAOAhYg7AFiIuAOAhYg7AFjo/wGQLtPTolua8wAAAABJRU5ErkJggg==\n" } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "#write_output(bivoting, test_X)\n", "write_output(bivoting, verification_X)\n", "print 'writing verification on bivoting done...'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "writing verification done...\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "## test SGD with Elastic Net penalty\n", "sgdc_classifier = linear_model.SGDClassifier(alpha = 0.0001, n_iter = 200, penalty = 'l2')\n", "misclassified = benchmark(sgdc_classifier)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,\n", " fit_intercept=True, learning_rate=optimal, loss=hinge, n_iter=200,\n", " n_jobs=1, penalty=l2, power_t=0.5, rho=0.85, seed=0, shuffle=False,\n", " verbose=0, warm_start=False)\n", "training time: 0.08" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "test time: 0.00\n", "confusion matrix:\n", "[[1795 159]\n", " [ 274 419]]\n", "classification rate:\n", "83.641858708 %\n" ] } ], "prompt_number": 178 }, { "cell_type": "code", "collapsed": false, "input": [ "## test ensemble random trees\n", "forest_classifier = ensemble.RandomForestClassifier(n_estimators=60, n_jobs = -1)\n", "train_X_sparse, test_X_sparse = train_X, test_X\n", "train_X, test_X = train_X.todense(), test_X.todense()\n", "misclassified = benchmark(forest_classifier)\n", "train_X, test_X = train_X_sparse, test_X_sparse" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "RandomForestClassifier(bootstrap=True, compute_importances=False,\n", " criterion=gini, max_depth=None, max_features=auto,\n", " min_density=0.1, min_samples_leaf=1, min_samples_split=1,\n", " n_estimators=60, n_jobs=-1, oob_score=False, random_state=None,\n", " verbose=0)\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "training time: 107.65\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "test time: 2.78\n", "confusion matrix:\n", "[[1883 71]\n", " [ 398 295]]\n", "classification rate:\n", "82.2818284851 %\n", "AUC for test: 0.85516519191\n", "ROC plot\n" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD9CAYAAABHnDf0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHO5JREFUeJzt3XtUlHX+B/D3IKTmDRWvM2yIEKAIuosXMm20DMKkTbPQ\nbbeIgxxbK7tsadv+grZcaNvdWmnPYkftYhJaFpo4peZoXgATg7xEaKIDJcdR8S4MM8/vj+/GRXAY\nYeZ5Zp55v87hzAzz+Dwfv+t5993v832+X40kSRKIiEhVfJQugIiInI/hTkSkQgx3IiIVYrgTEakQ\nw52ISIUY7kREKmQ33B977DEMGjQIo0aNuu4xTz75JEJDQxEdHY39+/c7vUAiIrpxdsM9OTkZBoPh\nut8XFBTgyJEjqKiowLJlyzB//nynF0hERDfObrhPmjQJffv2ve7369evxyOPPAIAGD9+PGpra1FT\nU+PcComI6Ib5duYPV1dXIzAwsPGzTqdDVVUVBg0a1OI4jUbTmcsQEXmtji4i0Klwb+vC1wtyrnIg\npKenIz09Xeky3ALbognbookr2+LECeDAgZa/+/pr4ORJwOc64xgrVgDduwNdu7b8fW0tMGYM0KeP\n+KzRAHfcAQweDPj6OucnKKjjHeNOhbtWq4XJZGr8XFVVBa1W25lTEhE57OpV4PnngdOnW38nScC2\nbcDZs02/q6sDdDqg+RyRq1eB3/wGCA9v+xqTJwMzZojwbq5LF6B3787/HVylU+GemJiI7OxsJCUl\nobCwEP7+/q2GZIiInKmuDrDZxI+/P9DQAHzwQevwBYA//lEEd3PduslTp9LshvucOXOwfft2mM1m\nBAYGIiMjAxaLBQCQlpaGhIQEFBQUICQkBD169MDKlStlKdqT6fV6pUtwG2yLJmyLJnq9HlYrsHq1\nCPLmPv0UKCgQAX31qvjdsWNAUJDsZbo9jRxL/mo0Go65E1Ejsxmor2/7u0uXgIkTgVOngJSUlt/Z\nbEBcHPDQQ66v0R10Jjs7fUOViOhaJ04AX30FfPstUFjY8ruLF4GDB4EhQ9r+s2fPAsHBQEVF081K\nunHsuROR0+TmAu+9B3zxhbgJGRYG3HZb6yAPCABCQpSp0ZOw505EsqqvFzNUvvwSePZZoH9/wGIR\n499z5wKffALMnKl0ld6NPXciLydJwPnzYozbbG7/1WwW4+L9+4v533FxwNNPi3MNGiRmsJBzdCY7\nGe5EKiJJQHExsHdv64d1AMBgEOPhAQFNx587J2afBAQAAwY49urv3/bUQ3IuhjuRF7PZxPDIxYvA\n7Nnid3PnArff3jqAbTbgrrta9q579/aeud+ehuFO5KF+eRinLdu3i58DBwA/v6bfHz8OFBWJJyQB\nwGoVr/fcI3rieXnu/eQkOY43VIk8wJkzYhZJWRlQUiLWKtmwQXz3S1A3Z7WKGSe33CJ64c2FhgJR\nUU2ffXyuvzYKeSf23Ilc5Px5MZ793/8CGzcCpaUirHv3FlMDJ00CevUCpkxRulJyVxyWIVKYJIke\n+ebNohduNALr14ux7fBwYOpU8VRl8942UXsY7kQy+fFHYNUqYOdOMeukZ0/x+x9+AC5cEEEeHy/C\nPiEBuPtuZeslz8ZwJ3KSvXuBrCzxyPy187VtNuDwYbFI1YMPAjExwLBhTd/fcouYKkjkLLyhStRB\n588DTzwhVhscMED0zIcNA/76V2Ds2NbH9+olQpzI3THcyaucOiVubq5fLx72qa4Wv//Xv8TNTh8f\n0TPnAzrk6TgsQ6omSUB5uQjz9evFnPFp04DERDFb5aabgIEDxZZmRO6GY+7k9Z5/XtzgPHhQBPgv\nT1xKkhhuSUwUP3p9670widwVw528hs3WepOHDz4A5s0DXnsNuPVWMUOl+ROd3bpxmIU8E8OdVMdi\nEWulWK1iOEWSgE2bxFKyQFPvW5LE+wULgCVLlKuXyBUY7qQKNhuQkSFueO7bJ37XpYsI8EcfFT32\nqVOBOXO40BV5B4Y7eZyqKmDNGtEzP3NG9Mg1GrEo1rPPiqGV2Fhxw5PIWzHcye3l5oqph3l5YnZK\naanohf/616JnfvPNwPTpwJgxDHSiXzDcyS0dOSL20/z3v8XDQnPmANHRYueeLl2AkSO5kiGRPQx3\nciuzZ4ubn5cuAcOHi3XGFy8Ghg5VujIiz8LlB0hx5eXAiy8C69aJz598ItYgHzhQ2bqIvBV77tQh\nVqtY3nbvXuDNN4ErV8SN0JgYYPJkoG9fpSsk8nwcliGXsVqBzEyxc1B9PfD55+JR/YYG8f3ddwMT\nJwJ/+hPQvbuytRKpDcOdnK6iAsjJAf7xD/H5mWeAcePEMMsvW7516cIbokSuxHAnp1q7VqxXPmgQ\nsHAhsGiR0hUReSeGOzlFXR3wl7+ItVoefli8791b6aqIvBdny9ANO3AA+OwzMbQCiAeJ1q4VG1GU\nlXFHISJPx567F6ipEY/6nzolHvF/6y3g3DkgMhK4996m4yIjgblzuYIikbvgsAy1cu4csGULsHs3\n8M9/it899hjwq1+J93/4Q8v9P4nI/TDcqYVDh8SMlrNngTvvFE+IPvMMe+REnoZj7oQTJ4CiIjHL\nBQCCg8UDRsOHK1sXESmDPXcV2L9frK44ZAhw113A228DvXopXRURdVZnsrPdR1AMBgPCw8MRGhqK\nrKysVt+bzWbEx8dj9OjRiIyMxLvvvtuhQujGPPMMcN99wKxZIthvuQX46Sfg/fcZ7ETUTs/darUi\nLCwMW7ZsgVarxdixY5Gbm4uIiIjGY9LT01FXV4e//e1vMJvNCAsLQ01NDXybbSfPnnvn2GxiydwT\nJ4D/+z/g++/FQl0rVoh56BqN6LFzTjqRurhszL24uBghISEICgoCACQlJSE/P79FuA8ZMgRlZWUA\ngPPnz6N///4tgp1u3L59QGGheF9QIJbP/eV/32HDgPR0sRRAeLhiJRKRm7ObwtXV1QgMDGz8rNPp\nUFRU1OKY1NRUTJ06FUOHDsWFCxewZs2aNs+Vnp7e+F6v10Ov13e8apWqqxPj57Gx4oZofDyg1QKr\nVgEJCYC/v9IVEpErGY1GGI1Gp5zLbrhrHJg7t2TJEowePRpGoxFHjx7FtGnTUFpail7XDPw2D3dq\nbcUKICVFvI+IAL7+GujfX9maiEhe13Z8MzIyOnwuuzdUtVotTCZT42eTyQSdTtfimN27d2P27NkA\ngOHDh2PYsGEoLy/vcEHeyGYD5s0DHn9c7F506BCDnYg6x264x8TEoKKiApWVlaivr0deXh4SExNb\nHBMeHo4tW7YAAGpqalBeXo7g4GDXVawy27eL9V2sVuC558RG0UREndXuPPdNmzZh4cKFsFqtSElJ\nweLFi5GTkwMASEtLg9lsRnJyMk6cOAGbzYbFixdj7ty5LS/C2TJtkiSxQFe3bqK3ztkuRNQclx/w\nQDabuGG6eTPw7bdAdLTSFRGRu3HpQ0zkGvffL4J982YGOxE5Hyeky2jtWjGt8cwZ4MgRMd4+ebLS\nVRGRGnFYRiZXroj10u+6SywbMHWqGGsnIroejrm7udWrgd/9TrwvLQWiopSth4g8A8fc3dizz4pg\nnzEDaGhgsBORPNhzd6GGBsDPD8jMBF54QelqiMjTcFjGDdXWAh99BKxcKTbRICK6UdyJyc3U1AC3\n3Qb8+CPw2WdKV0NE3ojh7gJPPinWhtm1Cxg8WOlqiMgbMdydxGYT666/9ZZ4MKm6msFORMrhbBkn\n+eor4N57xfv8fGDoUGXrISLvxp57J507J26aPv20mO64fr3SFRERcbZMpxw/DgQFASNGiKdOlyxR\nuiIiUhPOllFIZibQowdw4IDYpJqIyF1wzL0Tdu0CFi5ksBOR+2G4d1B9PfDdd8A99yhdCRFRaxxz\n7wBJErNhTp4U2+P58D+RROQCXDhMZn//uwj2775jsBORe2LP/QZcvCg22Lj3XvGzYYPSFRGRmrHn\n7mIlJcCDDwK9egEzZwITJ4pdlYiI3BXDvR1ffy0eTqqpEcsLXLkC7NzJXZSIyL0x3O3YsUPscRoU\nJOa0x8dzjJ2IPAPH3K/DYgG6dgXGjwf27FG6GiLyRhxzd4Hly8WUxw8/VLoSIqIbx3Bvww8/APPn\nA2lpQHCw0tUQEd04Dstco7YW6NsXGDVKDMf06KF0RUTkrTgs4yT33y+CHQD27WOwE5HnYrj/z9Kl\nYr/TTZuAhgbAz0/pioiIOo5L/gLQ68WTp6+8IqY7EhF5Oo65QyzZW1ICjBmjdCVERE06k50Md4in\nTWtr+dQpEbkX3lDthHPngLo6brhBROri9eG+Zo147dpV2TqIiJzJ68M9Px945BGlqyAici6vDvdT\np4CNG8Xa7EREauK1N1QvXQJ69hQ3US9f5pg7Ebkfl95QNRgMCA8PR2hoKLKysto8xmg0YsyYMYiM\njIRer+9QIXK5ckVsat2zp/jMYCciNbLbc7darQgLC8OWLVug1WoxduxY5ObmIiIiovGY2tpaTJw4\nEV988QV0Oh3MZjMCAgJaXsRNeu4HDoghmOPHgXXrgOnTgZtuUroqIqK2uaznXlxcjJCQEAQFBcHP\nzw9JSUnIz89vcczq1asxa9Ys6HQ6AGgV7EqTJODLL4EFC8RiYFYrsHmzWEeGwU5EamV3+YHq6moE\nBgY2ftbpdCgqKmpxTEVFBSwWC6ZMmYILFy7gqaeewu9///tW50pPT298r9frZRu+SUoS0x1nzABe\new147jmGOhG5J6PRCKPR6JRz2Q13jQOD0RaLBSUlJdi6dSsuX76M2NhYTJgwAaGhoS2Oax7ucrl6\nVQT755+LIRgiInd2bcc3IyOjw+eyG+5arRYmk6nxs8lkahx++UVgYCACAgLQvXt3dO/eHZMnT0Zp\naWmrcJfbhQvAyy+L9wx2IvI2dsfcY2JiUFFRgcrKStTX1yMvLw+JiYktjrnvvvuwc+dOWK1WXL58\nGUVFRRgxYoRLi3bEypXAu+8Cb7yhdCVERPKz23P39fVFdnY24uLiYLVakZKSgoiICOTk5AAA0tLS\nEB4ejvj4eERFRcHHxwepqamKh3tJCfDUU0ByMvDss4qWQkSkCNU9xGSxAM8/L5YV+P573jwlIs/V\nmexU1WYdV68C2dliSGbpUgY7EXkvVa0ts3EjkJkJvPQS0MZsTCIir6GaYZmffgK0WuC3vwU+/dSl\nlyIikgV3YgKQmAgYDMDJk0C/fi69FBGRLLx+zP3nn4ENG4D33mOwExEBKhlzLy8XS/fOnq10JURE\n7kEVwzLjxgE+PkBhocsuQUQkO68flvnmGzHeTkREgscPyxw/Lpb1nTBB6UqIiNyHx4f7kiXA4MFA\n795KV0JE5D48eljm1VeBZcvEE6lERNTEY2+oXrkC3HwzkJIiAt7H4/8/CBFRSy7dINtdbd0qXl99\nlcFORHQtj+25T58uNuTYscOppyUichteNxWyoED8cPojEVHbPK7nbrMBXboADzwArF3rlFMSEbkl\nrxpz37xZvC5bpmwdRETuzON67mPHit2Wvv3WKacjInJbXjXmXl4OrFmjdBVERO7No4ZlrFYxvz08\nXOlKiIjcm0eF+9dfAw0NXLOdiKg9HhPuP/8MTJkCjB/PdWSIiNrjMeH+zjtiCuSuXUpXQkTk/jwm\n3I8fBx5/XAQ8ERHZ5xHhXlgIrFgBxMYqXQkRkWfwiHnud9wB1NYCpaVOLIqIyM2pfp77iRPAG28o\nXQURkedw+2GZK1eAykogMlLpSoiIPIfbh/vHH4vXW29Vtg4iIk/i1uFeVQWkpQHx8YBGo3Q1RESe\nw63D/dFHxbDMm28qXQkRkWdx69kyGg3w0UfAQw+5oCgiIjenuvXcrVYgOlq8j49XthYiIk/kluH+\n9ttAWRlQUQH06aN0NUREnscth2WCg4ERI4DPP3dhUUREbk5VDzHV1ADHjgGrVytdCRGR52p3WMZg\nMCA8PByhoaHIysq67nF79+6Fr68v1q1b16mCcnLE64QJnToNEZFXsxvuVqsVCxYsgMFgwKFDh5Cb\nm4vDhw+3edwLL7yA+Pj4Tq0hU10NvP468PDDHT4FERGhnXAvLi5GSEgIgoKC4Ofnh6SkJOTn57c6\nbunSpXjggQcwYMCAThXz4INiI46XXurUaYiIvJ7dMffq6moEBgY2ftbpdCgqKmp1TH5+Pr766ivs\n3bsXmus8Spqent74Xq/XQ6/Xt/j+00+B3bvFyo9hYTf4tyAiUgGj0Qij0eiUc9kN9+sFdXMLFy5E\nZmZm413d6w3LNA/3tsycCcTFAVFR7V6SiEiVru34ZmRkdPhcdsNdq9XCZDI1fjaZTNDpdC2O2bdv\nH5KSkgAAZrMZmzZtgp+fHxITEx0uorBQvL7/vsN/hIiI7LA7z72hoQFhYWHYunUrhg4dinHjxiE3\nNxcRERFtHp+cnIwZM2Zg5syZLS/SzlzNefOAvXuB/fs7+LcgIlIhl81z9/X1RXZ2NuLi4mC1WpGS\nkoKIiAjk/G++YlpaWocu2pwkAevWAYsWdfpURET0P4o/oZqXByQlARcvAj16uLoSIiLP4bELhzU0\niGB/8EEGOxGRMynac798WYQ6e+1ERK15bM/9nXfEK4OdiMi5FA33ykreSCUicgVFw12jATq5YgER\nEbXBLTfrICKizlE03A8eFFvqERGRcykW7hYL8OWXQLN1yYiIyEkUC/dvvhGv99+vVAVEROqlWLg3\nNAC33w507apUBURE6qVYuFdXA1euKHV1IiJ1UyzcN2wA/P2VujoRkbopFu6rVwMJCUpdnYhI3RRb\nWyYgAPj+e/FKRESteezaMkRE5BqKhHt5OXD6NGfKEBG5iiLhbjIBYWFAr15KXJ2ISP0UCXebDRg8\nWIkrExF5B0VuqIaGAj17ckNsIiJ7XLZBtivYbMCRI8CWLXJfmYjIe8g+LLN1q3idPFnuKxMReQ/Z\nw72+Xjy85Ocn95WJiLwH57kTEakQw52ISIUY7kREKiT7VMgJE8TWenv3uvqqRESezaOmQhYVieV+\niYjIdWTvuWs0YpOObt1cfVUiIs/mUatC+voCXbrIfVUiIu8ia7jX1Ym9U4mIyLVkDffKSsDHR/Te\niYjIdWQN95MngQEDxLg7ERG5jqzhbjYDQ4fKeUUiIu8ka7gXFAD+/nJekYjIO8ka7rm5wMSJcl6R\niMg7tRvuBoMB4eHhCA0NRVZWVqvvP/zwQ0RHRyMqKgoTJ05EWVlZm+f58Ucxv33evM4XTURE9tmd\nt2K1WrFgwQJs2bIFWq0WY8eORWJiIiIiIhqPCQ4Oxo4dO9CnTx8YDAbMmzcPhYWFrc518SIwahQQ\nGOj8vwQREbVkt+deXFyMkJAQBAUFwc/PD0lJScjPz29xTGxsLPr06QMAGD9+PKqqqto816lTwIUL\nTqqaiIjssttzr66uRmCzrrZOp0NRUdF1j1++fDkSEhLa/O4//0lHXR2Qng7o9Xro9foOFUxEpFZG\noxFGo9Ep57Ib7pobmJC+bds2rFixArt27Wrz+1mz0tG1qwh3IiJq7dqOb0ZGRofPZTfctVotTCZT\n42eTyQSdTtfquLKyMqSmpsJgMKBv374dLoaIiJzD7ph7TEwMKioqUFlZifr6euTl5SExMbHFMSdO\nnMDMmTOxatUqhISEuLRYIiJyjN2eu6+vL7KzsxEXFwer1YqUlBREREQgJycHAJCWloZXXnkFZ8+e\nxfz58wEAfn5+KC4ubnWub74Bamtd8DcgIqJWZFvPfdEiCd26AS+/7OqrERGpg0es575zJ5f7JSKS\ni2zh3rcvEBMj19WIiLybbOHOZX6JiOQj+zZ7RETkerKF+9Gjcl2JiIhkC/eTJ4FBg+S6GhGRd5Mt\n3E+fFlvsERGR68k2zx2QYLFwc2wiIkd5xDz3bt0Y7EREcuFsGSIiFWK4ExGpkKxj7q6/EhGRenjE\nmPuvfiXXlYiISLZwHzhQrisRERHH3ImIVEi2cDeb5boSERHJFu7jxsl1JSIiki3cb75ZrisRERHH\n3ImIVIjhTkSkQgx3IiIVYrgTEakQw52ISIUY7kREKsRwJyJSIYY7EZEKyRbuVqtcVyIiItnC3d9f\nrisREZFs4X7LLXJdiYiIOOZORKRCDHciIhViuBMRqRDDnYhIhRjuREQqxHAnIlIhhrvMjEaj0iW4\nDbZFE7ZFE7aFc7Qb7gaDAeHh4QgNDUVWVlabxzz55JMIDQ1FdHQ09u/f7/Qi1YT/cJuwLZqwLZqw\nLZzDbrhbrVYsWLAABoMBhw4dQm5uLg4fPtzimIKCAhw5cgQVFRVYtmwZ5s+f79KCiYiofXbDvbi4\nGCEhIQgKCoKfnx+SkpKQn5/f4pj169fjkUceAQCMHz8etbW1qKmpaXWuJ55wYtVERGSXr70vq6ur\nERgY2PhZp9OhqKio3WOqqqowaNCgFsd17apxRr2qkJGRoXQJboNt0YRt0YRt0Xl2w12jcSyQJUmy\n++eu/Z6IiFzL7rCMVquFyWRq/GwymaDT6eweU1VVBa1W6+QyiYjoRtgN95iYGFRUVKCyshL19fXI\ny8tDYmJii2MSExPx/vvvAwAKCwvh7+/fakiGiIjkZXdYxtfXF9nZ2YiLi4PVakVKSgoiIiKQk5MD\nAEhLS0NCQgIKCgoQEhKCHj16YOXKlbIUTkREdkhOtGnTJiksLEwKCQmRMjMz2zzmiSeekEJCQqSo\nqCippKTEmZd3K+21xapVq6SoqChp1KhR0m233SaVlpYqUKU8HPl3IUmSVFxcLHXp0kX65JNPZKxO\nXo60xbZt26TRo0dLI0eOlO644w55C5RRe21x6tQpKS4uToqOjpZGjhwprVy5Uv4iZZCcnCwNHDhQ\nioyMvO4xHclNp4V7Q0ODNHz4cOnYsWNSfX29FB0dLR06dKjFMRs3bpTuueceSZIkqbCwUBo/fryz\nLu9WHGmL3bt3S7W1tZIkiX/k3twWvxw3ZcoUafr06dLHH3+sQKWu50hbnD17VhoxYoRkMpkkSRIB\np0aOtMXLL78sLVq0SJIk0Q79+vWTLBaLEuW61I4dO6SSkpLrhntHc9Npyw84c068p3OkLWJjY9Gn\nTx8Aoi2qqqqUKNXlHGkLAFi6dCkeeOABDBgwQIEq5eFIW6xevRqzZs1qnLgQEBCgRKku50hbDBky\nBOfPnwcAnD9/Hv3794evr92RZI80adIk9O3b97rfdzQ3nRbubc13r66ubvcYNYaaI23R3PLly5GQ\nkCBHabJz9N9Ffn5+49PNjk7B9TSOtEVFRQXOnDmDKVOmICYmBh988IHcZcrCkbZITU3FwYMHMXTo\nUERHR+Ott96Su0y30NHcdNp/Bp01J14NbuTvtG3bNqxYsQK7du1yYUXKcaQtFi5ciMzMTGg0Gkhi\nqFCGyuTnSFtYLBaUlJRg69atuHz5MmJjYzFhwgSEhobKUKF8HGmLJUuWYPTo0TAajTh69CimTZuG\n0tJS9OrVS4YK3UtHctNp4c458U0caQsAKCsrQ2pqKgwGg93/W+bJHGmLffv2ISkpCQBgNpuxadMm\n+Pn5tZp26+kcaYvAwEAEBASge/fu6N69OyZPnozS0lLVhbsjbbF79278+c9/BgAMHz4cw4YNQ3l5\nOWJiYmStVWkdzk2n3BGQJMlisUjBwcHSsWPHpLq6unZvqO7Zs0e1NxEdaYvjx49Lw4cPl/bs2aNQ\nlfJwpC2ae/TRR1U7W8aRtjh8+LB05513Sg0NDdKlS5ekyMhI6eDBgwpV7DqOtMXTTz8tpaenS5Ik\nSSdPnpS0Wq10+vRpJcp1uWPHjjl0Q/VGctNpPXfOiW/iSFu88sorOHv2bOM4s5+fH4qLi5Us2yUc\naQtv4UhbhIeHIz4+HlFRUfDx8UFqaipGjBihcOXO50hbvPjii0hOTkZ0dDRsNhtef/119OvXT+HK\nnW/OnDnYvn07zGYzAgMDkZGRAYvFAqBzuamRJJUOcBIReTHuxEREpEIMdyIiFWK4ExGpEMOdiEiF\nGO5ERCrEcCciUqH/B9LwDXPEWvO/AAAAAElFTkSuQmCC\n" } ], "prompt_number": 179 }, { "cell_type": "code", "collapsed": false, "input": [ "## test ensemble extremely randomized tree\n", "random_forest_classifier = ensemble.ExtraTreesClassifier(n_estimators = 100, n_jobs = -1)\n", "train_X_sparse, test_X_sparse = train_X, test_X\n", "train_X, test_X = train_X.todense(), test_X.todense()\n", "misclassified = benchmark(random_forest_classifier)\n", "train_X, test_X = train_X_sparse, test_X_sparse" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "ExtraTreesClassifier(bootstrap=False, compute_importances=False,\n", " criterion=gini, max_depth=None, max_features=auto,\n", " min_density=0.1, min_samples_leaf=1, min_samples_split=1,\n", " n_estimators=100, n_jobs=-1, oob_score=False, random_state=None,\n", " verbose=0)\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "training time: 150.70\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "test time: 3.42\n", "confusion matrix:\n", "[[1884 70]\n", " [ 404 289]]\n", "classification rate:\n", "82.0929353986 %\n", "AUC for test: 0.857511361606\n", "ROC plot\n" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD9CAYAAABHnDf0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHURJREFUeJzt3XtYVHX+B/D3COMlNW+Y6QyFMsRFBCrUWFMxMww3as1a\nrG2NiHjczGx3227Ps0FbJrZttbJtWGqbJg9Z1tiGY+tlvANeCktZFklkoDInRQsvwHB+f3x/gtyG\nAWbOmTnn/XoenrlwPN9P5/F59/V7vuf71UmSJIGIiFSll9IFEBGR+zHciYhUiOFORKRCDHciIhVi\nuBMRqRDDnYhIhZyG+0MPPYQRI0Zg3LhxHR6zcOFChISEIDo6Gl988YXbCyQioq5zGu4pKSmwWCwd\n/j4/Px9Hjx5FWVkZli9fjvnz57u9QCIi6jqn4T558mQMGTKkw99v2LAB8+bNAwBMnDgRNTU1OHHi\nhHsrJCKiLvPvyR+urq5GYGBg02ej0YiqqiqMGDGixXE6na4nzRARaVZ3FxHoUbi313BHQc5VDoSM\njAxkZGQoXYZX4LVoxmvRzJuuxeHDwPLlwNmzQK9W4xzbtwPl5cDgwc7PUVMjXuPj2//9hQvAtdcC\nN98M9OkDzJgB9O4tfmcwdL9j3KNwNxgMsNlsTZ+rqqpgMBh6ckoiItls2SLCd9cu4LvvAD8/8X1t\nLVBaCvz3v8B11wH33w+0jra4OBHIrQYq2tWvH9C3r/vrd6ZH4Z6UlITs7GwkJyejoKAAgwcPbjMk\nQ0TkKfX1QEODeP/GGyKkL/V6Lzl5UnzfkdmzxXnGjQMiIsR3ffsCoaHAmDHAFVd4pnZPcxruc+fO\nxfbt22G32xEYGIjMzEzU19cDANLT05GYmIj8/HyYTCb0798fq1atkqVoXxbf0b/NNIjXopkWr4XD\nAZw50/I7SQLOnYvHjBnivV7v/BwWi+ht63Qi5BcuBKZObXvc668DN97ovtp9gU6OJX91Oh3H3Ik0\nwuEA2ps097//AY8/DgwdKj7v3i16zJdPyLtwATh/HkhOBu69t/OhjAEDgMmT3Ve7t+lJdjLciahd\n334LWK1AdXXb3/3wA2A2Nwf15SorgdOnW4Y2IIZHxo4FXnut+bvYWGDgQLeWrSoMdyJyqrxc9IgB\n4PvvgZ07m28eNjQAb70FjBrVfPzFi+Jm4pVXAtOnA8HBLc8nSSKUExLattWrlxgCuXR+6j6GOxF1\n6NAhIDpa9JoBMa3PYBBT7gAR1H5+wF13tfxzQ4aIKXqknJ5kZ4/nuRORe9XVAUVFInRddfEi8OGH\nYspda6+/DtxwA3DggPtqJO/HnjuRzH74ASguFu/z8sSYtsUCDB8uvjt5UrzefLPr57x4UcwYSU5u\n+zt/f+Chh4D+/XtWN8mPwzJEXmr79ubpfnv2APv2AVu3iiGPG28E7Hbg4YfF8MeECc1/btAg8bQi\naRvDnUhBDocYQikuBhYvBtavF2PYjY3i+zvuEMfV1QGRkUBiIjBtmuhpEznDMXcimXz3HbBpkwjm\nH38E/v1vYNs2EeYOB3DnnWL64KRJ4vhevdquSUIkB/bciSB62V99BXz8MbBxo5gC2NrJk6J3PmKE\nmAJ45ZXiRmVsrHh0ncjd2HMn6qK9e4GCAqCsTLw/ehT4+WcgLEwMm8yc2f6fMxqB8HB5ayXqDvbc\nSRMkCaioEOPea9YAL74oet2TJwNXXSVew8OBgAClKyVqxhuqRE5UVYknMF96SSzfWlMD3Hdfy8fg\nibwRw50IYq73m2+KBapsNrHGSW0t8M03YurhwoWAl+wBQeQShjtp2urVYg75W2+JzwsWiEfrAwPF\nTc/evcV7Il/DcCdNKS4WKxZ+8AHw3ntipsucOWIGy29+I/+ON0SewtkypHr794shla+/Bo4fB266\nSeyQs2mTmFPe3poqRFrGcCevd+utYq/LmBhgwwax/RkfzSdyjuFOXuv4ceCZZ0Swf/GFCHcicg3D\nnRSzYQOQmys2j2jtxAmgpESsO/7OOwx2oq7iDVVSRGGhGDd/5pnmTSNaMxqBkBB56yLyJpwtQz6l\nsVGsXW4yiZAnovb1JDu5Xh3Jpr5ePPo/dixw6hSwbp3SFRGpF8OdPO6TT8SCXL17Aw88IHrsO3YA\n11yjdGVE6sUbquRWx46JFRZ37RJL5+7bJ76fNEncPA0NFfPTicizOOZObrF/P5CVJTZpDgoSs1xu\nuAG4917Ra+dqi0RdxydUSRbnzwOffSZuiJ46BTzxBHDhgtiAuaFBrLj48cfAXXcpXSkRsedOHWps\nBM6dE+/T08UN0Pp64J57gIEDgbg44P77RbgDgF6vXK1EasSeO7ldaSkQFSU2t+jfXyyd+9FHwIQJ\nYv45EXk3hjs12bULeOUVoLwcOHxYLJO7f7/YqYiIfAunQhLMZrHu+eTJYimAP/5RhHtlJYOdyFex\n564xDQ3AqlVip6ILF4B//lNsDD1jhljDhXPPidSBN1RVSJLEolw7dgB+fs3fv/eeWJALAJKTxRTF\nvn3FfqLcqYjI+3BtGY2rrRW7E+3cKXrk//iH+H7GDLEW+iUNDcDdd4s56AMGKFMrEbmO4a5R69aJ\nh4SuuAKIiAB69RIzXCIjgUce4e5ERL6OUyE14LvvgD/8QTzCfym0z58HXn5ZPEzEnYmI6HKdzpax\nWCwICwtDSEgIsrKy2vzebrdj5syZiImJQWRkJN59911P1KlpW7cCo0aJYF+2DLDbxc+FC8DTTzPY\niagtp8MyDocDoaGh2Lx5MwwGA8aPH4/c3FyEh4c3HZORkYGLFy/i5Zdfht1uR2hoKE6cOAF//+Z/\nFHBYpvtqa8X4eFQUsHs3x8qJtMRj67kXFRXBZDIhKCgIer0eycnJMJvNLY4ZOXIkzp49CwA4e/Ys\nhg0b1iLYqfueeaY5zAsLGexE5DqnKVxdXY3Ay+bIGY1GFLbaOictLQ233HILRo0ahZ9++gkffPBB\nu+fKyMhoeh8fH4/4+PjuV60BdjuwZAmwdCnw5JNKV0NEcrBarbBarW45l9Nw1+l0nZ5g8eLFiImJ\ngdVqRXl5OWbMmIHi4mIMHDiwxXGXhzs553AAr70mxtkZ7ETa0brjm5mZ2e1zOR2WMRgMsNlsTZ9t\nNhuMrVaN2rNnD+655x4AQHBwMEaPHo3S0tJuF6R1lZXA+PHA4sXAn/+sdDVE5KuchntsbCzKyspQ\nUVGBuro65OXlISkpqcUxYWFh2Lx5MwDgxIkTKC0txZgxYzxXsUrV1gKPPgpce61Y18VsFsvsEhF1\nh9NhGX9/f2RnZyMhIQEOhwOpqakIDw9HTk4OACA9PR3PPvssUlJSEB0djcbGRixduhRDhw6VpXg1\niYgQvfZly8QDSL17K10REfkyPqGqIEkCXnyxefilpESs90JEBPAJVZ9itwNPPSWGYfLyxHdPPw1k\nZPBhJCJyH/bcZbR+PfDgg2IIZtEisVXdzJktV24kIrqEC4f5iJgYQKcTm0gHBSldDRF5Ow7LeLnq\najFfvbgYKCpisBOR53GbPQ9bsUJsKJ2fLxb+Gj9e6YqISAs4LONBlxb9evBBsbUdEVFXeGzhMOqZ\nSzsivfqqsnUQkfZwzN3N6uuBvXuBxkaxU9KjjwJ8pouI5MZwd7PVq4HUVGDqVDHV8Xe/U7oiItIi\njrm7kSQBsbFAYCDwySdKV0NEvo5j7l6gqEhsUH3woOi5ExEpiT13N5k8Gbh4UeyY5MIy+EREneJD\nTAr7/HNg1y5g+3YGOxF5B/bc3SApCTh9Gti5U+lKiEhN2HNXUGUl8OmnYr0YIiJvwRuqPfDWW2Ln\npFGjgLvuUroaIqJmDPcu+u474K9/Bfr1A+bPF2uxf/WV0lUREbXEMfcuuu8+YMcOsQ77iy8CV1+t\ndEVEpFYcc5dJXh6we7dYK+bXv1a6GiKijnFYxkUpKUBysthF6bbblK6GiMg5Dsu44MABsazAJ58A\nd96pdDVEpBVcfsDDCgvFFnkMdiLyFQz3Thw4IJbtjYlRuhIiItdxWKYDaWliWYHKSmDSJMBqBfx5\n+5mIZNST7GS4t+PLL4Hrrwfefhu45RZgzBilKyIiLWK4u9nUqWL/0/37la6EiLSM89zd7OuvxY5K\nRES+ijdUW9m1Czh1CrjuOqUrISLqPg7LtJKYCBw5Ahw9yhuoRKQsznPvIYcD+NOfxEYbGzcCb77J\nYCci36b5cP/b30SQv/IK8Pe/i6BPTFS6KiKintH8sIxOByxcCLzxhtKVEBG1xKmQ3SBJwKxZYhjG\n4QB6af7fMETkbTgVsoskCbjmGqCqCjCbGexEpD6aDPf160WwHzsGBAUpXQ0Rkftpqs/qcAAPPwzM\nmQPExTHYiUi9Og13i8WCsLAwhISEICsrq91jrFYrrr/+ekRGRiI+Pt7dNfZYYyPw8svAuHHAu+8C\nL70E7NypdFVERJ7j9Iaqw+FAaGgoNm/eDIPBgPHjxyM3Nxfh4eFNx9TU1GDSpEnYtGkTjEYj7HY7\nAgICWjai8A3VhASxwuOCBWL53rAwxUohInKZx26oFhUVwWQyIej/xy+Sk5NhNptbhPvatWtx9913\nw2g0AkCbYFdaSYkI9txcsU0eEZEWOA336upqBAYGNn02Go0oLCxscUxZWRnq6+sxbdo0/PTTT3j8\n8cfxwAMPtDlXRkZG0/v4+HjZhm/y8oBf/YrBTkTez2q1wmq1uuVcTsNdp9N1eoL6+nocPHgQW7Zs\nwblz5xAXF4ebbroJISEhLY67PNw9rbER+Ne/xJICdjvw1FOyNU1E1G2tO76ZmZndPpfTG6oGgwE2\nm63ps81maxp+uSQwMBC33XYb+vXrh2HDhmHKlCkoLi7udkHu8OqrwEMPATNmAN98AyxZomg5RESy\ncxrusbGxKCsrQ0VFBerq6pCXl4ekpKQWx9x5553YtWsXHA4Hzp07h8LCQkRERHi06M786U/A7NnA\n2rXA6NGKlkJEpAinwzL+/v7Izs5GQkICHA4HUlNTER4ejpycHABAeno6wsLCMHPmTERFRaFXr15I\nS0tTNNxXrRKv2dmKlUBEpDhVrS1z4AAQGwvMny+W7SUi8mVcOOz/3X478NVXYqONvn093hwRkUdx\n4TAAP/8MWCxinJ3BTkRap5qee2KiWL63vp67KBGROmh+m729e0Wwr17NYCciAlTSc09PB778Emj1\n8CwRkU/TdM/9+eeB5cuB++5TuhIiIu/h0z13h0MMwzz2mNjcmohITTTbc3/ySfH62mvK1kFE5G18\nNtzr6kSov/AC4OendDVERN7FJ8P988+B6dPF+yeeULYWIiJv5HMTBx0O4JFHgD59xPTHAQOUroiI\nyPv4XLibzcDx48ChQ2JPVCIiasvnZsuMHCkWB/v0U7ecjojIa2lq4bABA4CCAiAy0i2nIyLyWpqZ\nCmk2A7W1wNVXK10JEZF386lwt9uBWbOAgAClKyEi8m4+Fe779gHDhytdBRGR9/OpcF+5EggOVroK\nIiLv5zM3VGtrxc3UEyeAq65yU2FERF5MEzdUn31WvDLYiYg65xPhXlcnVn38/e+VroSIyDf4xLDM\nt98CBgPQ2AjodG4sjIjIi6l+WObtt8Urg52IyDU+Ee7l5cDcuUpXQUTkO3wi3CsqgFtvVboKIiLf\n4RPhvnOnGHMnIiLXeH24f/aZeJ0yRdk6iIh8iVfPljl/Xiw3EBYG7N/vgcKIiLyYamfLFBaKJ1Nz\nc5WuhIjIt3h1uH/5JRAVBYSEKF0JEZFv8epwf+45IDpa6SqIiHyP1465NzYCfn5AZSUQGOihwoiI\nvJgqx9yPHROvDHYioq7z2p57WJiYLXP8uIeKIiLycj3pufu7uRa3eO01oLQU2LFD6UqIiHyTVw7L\nVFYCCxcCkycrXQkRkW/qNNwtFgvCwsIQEhKCrKysDo/bt28f/P39sX79+h4XVV4OXHttj09DRKRZ\nTsPd4XBgwYIFsFgsOHLkCHJzc1FSUtLucU899RRmzpzZ4+30AODMGeDKK3t8GiIizXIa7kVFRTCZ\nTAgKCoJer0dycjLMZnOb45YtW4Y5c+Zg+PDhPS6ooUGMtXOWDBFR9zm9oVpdXY3Ay1LWaDSisLCw\nzTFmsxlbt27Fvn37oOtgR42MjIym9/Hx8YiPj2/3uOeeE6833+xC9UREKmK1WmG1Wt1yLqfh3lFQ\nX27RokVYsmRJ05SdjoZlLg93Z5YuBRYtAvr3d+lwIiLVaN3xzczM7Pa5nIa7wWCAzWZr+myz2WA0\nGlscc+DAASQnJwMA7HY7Nm7cCL1ej6SkpC4Xk58vXv/yly7/USIiuozTh5gaGhoQGhqKLVu2YNSo\nUZgwYQJyc3MRHh7e7vEpKSm44447MHv27JaNuDgRf948MVNm164u/lcQEamQxx5i8vf3R3Z2NhIS\nEuBwOJCamorw8HDk5OQAANLT07vVaMftASkpbj0lEZEmec3yAw0NgF4PvPMOkJrq6YqIiLxfT3ru\nXhPuFRXA6NHAzz/zZioREaCSVSHLy4E+fRjsRETu4DXhXlAgVoIkIqKe85pwP3oUmDJF6SqIiNTB\na8K9shIYOVLpKoiI1MFrwv2bb4DQUKWrICJSB68I9zNnxGyZqCilKyEiUgevCPdLqwibTMrWQUSk\nFl4R7jYbEBGhdBVEROrhFeG+bh1wzTVKV0FEpB5eEe4WCzB+vNJVEBGph+LhLkmAnx8wd67SlRAR\nqYfi4f7++0BNDRAQoHQlRETqofjCYcHBYgrkxx97ugoiIt/isfXcPa2kRDy89MknSlZBRKQ+ig7L\nbNok5raPG6dkFURE6qPosExAgBiS2brV0xUQEfken1zP/cgR4McfgVdeUaoCIiL1UqznPmIEMGCA\nGHfv3dvTFRAR+R6fu6F6/Djwww/AwYMMdiIiT1BkWCYnR4S6waBE60RE6qdIuK9bB/z2t0q0TESk\nDYqEe3U1MG+eEi0TEWmDIuF+/jxw3XVKtExEpA2KhHv//sAVVyjRMhGRNsge7idPArW1gF4vd8tE\nRNohe7jX1oqNOfr0kbtlIiLtUGRYRqdTolUiIu1QfD13IiJyP4Y7EZEKyR7uJSXAqVNyt0pEpC2y\nh/uhQ8ANN8jdKhGRtigyFXLCBLlbJSLSFtnDvbIS6MWRfiIij5I9ZsvLgfBwuVslItKWTsPdYrEg\nLCwMISEhyMrKavP7999/H9HR0YiKisKkSZNw6NAhp+c7e1Y8xERERJ7jdLMOh8OBBQsWYPPmzTAY\nDBg/fjySkpIQflnXe8yYMdixYwcGDRoEi8WCRx55BAUFBR2es08fYPBg9/0HEBFRW0577kVFRTCZ\nTAgKCoJer0dycjLMZnOLY+Li4jBo0CAAwMSJE1FVVeW0wcOHgSuv7GHVRETklNOee3V1NQIDA5s+\nG41GFBYWdnj8ihUrkJiY2O7vMjIycOaMeH/8eDyCg+O7Xi0RkYpZrVZYrVa3nMtpuOu6sAjMtm3b\nsHLlSuzevbvd32dkZKC0FMjPB265pWtFEhFpQXx8POLj45s+Z2ZmdvtcTsPdYDDAZrM1fbbZbDAa\njW2OO3ToENLS0mCxWDBkyJBuF0NERO7hdMw9NjYWZWVlqKioQF1dHfLy8pCUlNTimMrKSsyePRtr\n1qyByWTyaLFEROQapz13f39/ZGdnIyEhAQ6HA6mpqQgPD0dOTg4AID09HS+88AJOnz6N+fPnAwD0\nej2Kioo8XzkREXVIJ0mS5PFGdDpIkoSPPwYeeAD4+WdPt0hE5PsuZWd3yPqEqsMBTJ8uZ4tERNrk\ndFjG3VatEtvsERGRZ8ka7n36ALNmydkiEZE2yTos4+8PBATI2SIRkTZx8V0iIhWSNdxrauRsjYhI\nu2QLd0kC/vMfrghJRCQH2ea5OxwS/PxEyBMRUed8Yp67wyFXS0REJFu4f/65XC0REZFs4X7mDDBl\nilytERFpm6yzZQwGOVsjItIuznMnIlIh2ZYfMJuBixflao2ISNtk67n/9BNw/fVytUZEpG2yhbte\nD8TEyNUaEZG2yRbudrtcLRERkWzh/vXXwIABcrVGRKRtsoV7QAAQFCRXa0RE2sapkEREKsRwJyJS\nIdnCva5OrpaIiEi2cK+qAgYNkqs1IiJtk2099759JZw/7+mWiIjUoyfrucsW7oDEjTqIiLrAJzbr\nGDJErpaIiEi2cL/6arlaIiIihjsRkQrJFu7cqIOISD6yhfvw4XK1RERE7LkTEakQlx8gIlIhhjsR\nkQox3ImIVIjhTkSkQgx3mVmtVqVL8Bq8Fs14LZrxWrhHp+FusVgQFhaGkJAQZGVltXvMwoULERIS\ngujoaHzxxRduL1JN+Be3Ga9FM16LZrwW7uE03B0OBxYsWACLxYIjR44gNzcXJSUlLY7Jz8/H0aNH\nUVZWhuXLl2P+/PkeLZiIiDrnNNyLiopgMpkQFBQEvV6P5ORkmM3mFsds2LAB8+bNAwBMnDgRNTU1\nOHHihOcqJiKizklOrFu3Tnr44YebPq9evVpasGBBi2N++ctfSrt37276PH36dGn//v0tjgHAH/7w\nhz/86cZPd/nDCbEOe+darzfc+s/JsGQ8ERFdxumwjMFggM1ma/pss9lgNBqdHlNVVQUD1xogIlKU\n03CPjY1FWVkZKioqUFdXh7y8PCQlJbU4JikpCe+99x4AoKCgAIMHD8aIESM8VzEREXXK6bCMv78/\nsrOzkZCQAIfDgdTUVISHhyMnJwcAkJ6ejsTEROTn58NkMqF///5YtWqVLIUTEZET3R6tb8fGjRul\n0NBQyWQySUuWLGn3mMcee0wymUxSVFSUdPDgQXc271U6uxZr1qyRoqKipHHjxkm/+MUvpOLiYgWq\nlIcrfy8kSZKKiookPz8/6aOPPpKxOnm5ci22bdsmxcTESGPHjpWmTp0qb4Ey6uxanDx5UkpISJCi\no6OlsWPHSqtWrZK/SBmkpKRIV111lRQZGdnhMd3JTbeFe0NDgxQcHCwdO3ZMqqurk6Kjo6UjR460\nOOazzz6Tbr/9dkmSJKmgoECaOHGiu5r3Kq5ciz179kg1NTWSJIm/5Fq+FpeOmzZtmjRr1izpww8/\nVKBSz3PlWpw+fVqKiIiQbDabJEki4NTIlWvx/PPPS08//bQkSeI6DB06VKqvr1eiXI/asWOHdPDg\nwQ7Dvbu56bblBzgnvpkr1yIuLg6DBg0CIK5FVVWVEqV6nCvXAgCWLVuGOXPmYLiKd3Vx5VqsXbsW\nd999d9PEhYCAACVK9ThXrsXIkSNx9uxZAMDZs2cxbNgw+Ps7HUn2SZMnT8aQIUM6/H13c9Nt4V5d\nXY3AwMCmz0ajEdXV1Z0eo8ZQc+VaXG7FihVITEyUozTZufr3wmw2Nz3d7OoUXF/jyrUoKyvDqVOn\nMG3aNMTGxmL16tVylykLV65FWloaDh8+jFGjRiE6OhpvvPGG3GV6he7mptv+N+iuOfFq0JX/pm3b\ntmHlypXYvXu3BytSjivXYtGiRViyZAl0Oh0kMVQoQ2Xyc+Va1NfX4+DBg9iyZQvOnTuHuLg43HTT\nTQgJCZGhQvm4ci0WL16MmJgYWK1WlJeXY8aMGSguLsbAgQNlqNC7dCc33RbunBPfzJVrAQCHDh1C\nWloaLBaL03+W+TJXrsWBAweQnJwMALDb7di4cSP0en2babe+zpVrERgYiICAAPTr1w/9+vXDlClT\nUFxcrLpwd+Va7NmzB8899xwAIDg4GKNHj0ZpaSliY2NlrVVp3c5Nt9wRkCSpvr5eGjNmjHTs2DHp\n4sWLnd5Q3bt3r2pvIrpyLY4fPy4FBwdLe/fuVahKebhyLS734IMPqna2jCvXoqSkRJo+fbrU0NAg\n1dbWSpGRkdLhw4cVqthzXLkWTzzxhJSRkSFJkiR9//33ksFgkH788UclyvW4Y8eOuXRDtSu56bae\nO+fEN3PlWrzwwgs4ffp00zizXq9HUVGRkmV7hCvXQitcuRZhYWGYOXMmoqKi0KtXL6SlpSEiIkLh\nyt3PlWvx7LPPIiUlBdHR0WhsbMTSpUsxdOhQhSt3v7lz52L79u2w2+0IDAxEZmYm6uvrAfQsN3WS\npNIBTiIiDeNOTEREKsRwJyJSIYY7EZEKMdyJiFSI4U5EpEIMdyIiFfo/ywGEpMu2U9AAAAAASUVO\nRK5CYII=\n" } ], "prompt_number": 185 }, { "cell_type": "code", "collapsed": false, "input": [ "## test ensemble GradientBoostingClassifier\n", "boosting_classifier = ensemble.GradientBoostingClassifier(n_estimators = 200, max_depth = 1, learn_rate = 0.1)\n", "train_X_sparse, test_X_sparse = train_X, test_X\n", "train_X, test_X = train_X.todense(), test_X.todense()\n", "misclassified = benchmark(boosting_classifier)\n", "train_X, test_X = train_X_sparse, test_X_sparse" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "GradientBoostingClassifier(init=None, learn_rate=0.1, loss=deviance,\n", " max_depth=1, max_features=None, min_samples_leaf=1,\n", " min_samples_split=1, n_estimators=100,\n", " random_state=,\n", " subsample=1.0)\n", "training time: 57.14" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "test time: 0.08" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "confusion matrix:\n", "[[1816 138]\n", " [ 356 337]]\n", "classification rate:\n", "81.3373630525 %\n", "AUC for test: 0.835463495904\n", "ROC plot\n" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD9CAYAAABHnDf0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHQxJREFUeJzt3X9UVOedBvAHZRKJRUVRqzOkKFDAKGAKIrrqGGshNsUm\nJidkT3MSSzisqUnMblvTdLMOnlMjdjenibS7mEZ7opFoTRrMBsdUw2j9ARgxsInGjlbiMIkoBoQF\nF4bh7h9vYUBgGIeZe+feeT7nzBkmXO798h7y5M173/u+IZIkSSAiIk0ZpXQBRETkewx3IiINYrgT\nEWkQw52ISIMY7kREGsRwJyLSILfh/uMf/xhTp07FnDlzhjzm2WefRVxcHJKTk3HmzBmfF0hERLfP\nbbivXr0aZrN5yO+XlZXhwoULsFqt2LZtG9asWePzAomI6Pa5DfdFixYhIiJiyO/v378fTzzxBAAg\nPT0dzc3NaGho8G2FRER020JH8sN2ux1RUVG9nw0GA+rr6zF16tR+x4WEhIzkMkREQcvbRQRGFO6D\nXXioIOcqB4LJZILJZFK6jIDAtnBhW7go1RaSBHR0AC0tQGureO953fq5pQVoaxt4ju3bgeXLgbvv\nBr75TWDyZGD0aNf3w8OBmBggKgrQ6YavSa/3vmM8onDX6/Ww2Wy9n+vr66HX60dySiIin7h5E7Db\ngS+/FO89rytXBgZ1T3iHhADjxrle4eGDf46JAe66Cxh1y8B2VhbwyCPK/L63GlG4Z2dno6ioCDk5\nOaioqMCECRMGDMkQEd0uSRI946tXgWvXxHvPq+fzhQtAba0I8cHccQeg1wPTp4t3vR4wGIDUVGDC\nhIHhHR4O3HmnvL+nP7kN98ceewxHjhxBY2MjoqKiUFBQAIfDAQDIz8/HihUrUFZWhtjYWIwdOxY7\nduyQpWg1MxqNSpcQMNgWLsHQFh0dQwd138+XLxuxZYv4mSlTxGvyZNfXej2QkgLk5wPJySKYaaAQ\nOZb8DQkJ4Zg7kcZ0dQHXrw8f1j1f37zpCum+YT3U57FjxTBJMBtJdjLcicit118HPv4Y+MtfxNBF\na6sI6xs3gIgIz8N6wgSG9e1iuBORR7q6RA+6vV28HzwIXL4sPre1ife+X7e2AjU1wL/8CzB1KrBo\nkQj4yZOBSZP6zwQh3xtJdo54KiQRBZabN4HnnwfKywGHwxXk7e3iRmVYmJjpERYm/tmDDwIJCeKf\n3XWXGA7peQ8LAyZOBGbOVPq3otvFcCdSsU8+AbZtEyF9/TrQ2AhYrcA//APwpz+J2R89oR0WJuZW\nc2gkODDciQJcdzfQ1CTGuRsaXDco6+uBwkIgPh5Yv14Mk/S84uI4ZBLsOOZOFEDOngVeftkV4A0N\nojc+dqwY854ypf97fDzw6KNKV03+whuqRAHqxAmgz0Pcvd57T9y0bGsTs05u3BBPSDY3A08/DXzv\ne64QnzJFPJBDwYc3VIkCxI0bYsrgkSOAxSKmEC5fLqYM9tXaKnrcBoN4CGf8eNd7WJgipZPGsOdO\nNALNzSLMLRYR6J9/DqSnA0YjsGQJMG8eMGaM0lWSWnFYhkgmTU3A0aOunrnV6gpzoxFIS9PW+iSk\nLIY7kZ98/bUI856e+YULQEaGq2eelsbxcPIfhjuRj1y/7gpziwW4dMkV5kYj8J3vMMxJPgx3Ii9d\nu9Z/mKWuDli40NUz/853PNtUgcgfGO5EHrp6tf8wy+XLrjA3GoF77wVCOYeMAgTDnWgIDQ39h1ns\ndvFo/pIlIsznzmWYU+BiuBP93ZUrriGWI0eAr74SYd7TM09J4WP5pB4MdwpaX33lCnOLRfTUFy92\n9cyTkxnmpF4MdwoadrsI855Av3ZNhHlPz3zOHIY5aQfDnTSrvr7/MMv166JX3tMznzNn4A70RFrB\ncCfNsNn6D7M0N7uCfMkSYPZshjkFD4Y7qc7//i/w2mvAvn1ivXJABHlbmyvIjUZg1iyGOQUvrgpJ\nqtHeDmRniy3gHn0U2LpVrFUOiN2C4uK4UxCRLzDcSVZvvCHGzU+eFCsmEpF/8H94STbd3cB//zfw\nk58w2In8jeFOsmhpAVauFJtZrFypdDVE2sdhGZLFK6+41kLnqopE/sdwJ7/59FOgpAT4n/8B3n9f\nbPzMYCeSB6dCkk/97W/A22+LUL9xA8jJEeuhJyUBMTFKV0ekLpznToqrqAA2bwaOHwceeQR47DGx\nlC7nqBN5j/PcSTYOhxhi+fxzMZ3xyhUxd72tDfjpT4Hdu8V8dSJSFnvu5JGrV4HXXwf+67+A6GjR\nK58+XQy5jBolhl24YxGRb7HnTn5TUyNmuuzfD6xaJXrtKSlKV0VEw2HPnQbo2Yruww+Bd94B1q8H\ncnOBSZOUrowouLDnTiP21VfipmhJiQj1b38biIgA9u4Fli1Tujoiul3suQex7m5g40bgrbfEei9p\naWJRrx/9CBg/XunqiGgk2TnsRDWz2YyEhATExcWhsLBwwPcbGxuRlZWFlJQUzJ49G3/4wx+8KoTk\nde4ccO+9Itj/8z+Bxkbg4EGx7guDnUj93PbcnU4n4uPjcejQIej1eqSlpaGkpASJiYm9x5hMJnR0\ndODll19GY2Mj4uPj0dDQgNA+W8qz5x5YHA7gzjvFWumHDwNTpypdERENxm8996qqKsTGxiI6Oho6\nnQ45OTkoLS3td8y0adPQ0tICAGhpacGkSZP6BTsFnnXrgAULxLIADHYibXKbwna7HVFRUb2fDQYD\nKisr+x2Tl5eH++67D9OnT0drayv27t076LlMJlPv10ajEUaj0fuqyWuSBPzud2KdF26KQRRYLBYL\nLBaLT87lNtxDPPi3f9OmTUhJSYHFYsHFixexfPly1NTUIDw8vN9xfcOd5NfRIWbCvPIKkJwMPPOM\n0hUR0a1u7fgWFBR4fS63wzJ6vR42m633s81mg8Fg6HfMiRMn8MgjjwAAYmJiMGPGDJw/f97rgsi3\nuruBxYuB8HAR7lu2AGfOuLa2IyJtchvuqampsFqtqKurQ2dnJ/bs2YPs7Ox+xyQkJODQoUMAgIaG\nBpw/fx4zZ870X8U0LKcT+MUvgHHjxFz1+nrxpOnBg0BWFodjiIKB22GZ0NBQFBUVITMzE06nE7m5\nuUhMTERxcTEAID8/Hy+++CJWr16N5ORkdHd3Y8uWLZg4caIsxdPgnn4a+OQT4N13gdhYQK/nui9E\nwYYPMWnE6dNiPH33bmD0aOCjj8RwDBGpl18fYqLAV1YmhlvmzgXeew9obmawEwU7TkhXud//Hnjp\nJbFa4/z5SldDRIGC4a5SkgSYTMCuXWIFx7g4pSsiokDCcFchhwPIzxdPmJ44wadMiWgghrvKtLaK\nPUpDQwGLhfPViWhwvKGqIleuAEYjcPfd4sYpg52IhsJwV4nPPxf7lT74IFBcLHruRERDYbirwK9+\nBSQmAhs2AP/6r3zClIiGx4eYVGDBAmD5cmAEawgRkQqNJDsZ7gGuuxuYPBn49FNg2jSlqyEiOfEJ\nVY1qaRH7mY4bx2AnotvDcA9gP/sZUFcHvPOO0pUQkdpwWCZAXb0qHk6yWIAlS5SuhoiUwDF3jXE4\ngJUrxXDMrl2c9kgUrEaSnYyNAFRbK5bstdsZ7ETkHY65B5DOTuDNN4GcHGDtWmDSJKUrIiK1Yr9Q\nQZIkNtno6BDvpaVif9N9+4D77lO6OiJSM4a7gj7+GJg3Tzyk1N4uNtvYu5c9diIaOYa7gj75BJgz\nBzh+XOlKiEhrGO4KOHsWePttYNs2oKRE6WqISIsY7jKyWsVSvT//OZCUBJw6BURFKV0VEWkR57nL\nRJLEJhvHj4s9T598ErjrLqWrIqJAxnnuAa6lBYiMFIuA2e3cFo+I/I/hLoPnnhNPnV67JkKeiMjf\n+BCTH0mS2FyjshK4eJHBTkTyYc/dTyQJ+OlPgcOHgSNHxJrsRERyYbj7QXc38JOfANXVQHk5EBGh\ndEVEFGwY7j7mdAK5uWIY5s9/Fis7EhHJjeHuQw4H8PjjwPXrgNkMjB2rdEVEFKwY7j7S0QE8+ijQ\n1QW8/z4wZozSFRFRMONsGR9wOoGFC8Xa6+++y2AnIuUx3Efgyy/FjdOYGKC1VawXc8cdSldFRMRw\nH5H9+8VsmNxcsU4Md00iokDBOPLC+fPixumnnwK/+hXw/PNKV0RE1B8XDrtNkgRkZIgVHi0WsR47\nEZE/jCQ7OSzjIZtNDL+MGiWWE9i9m8FORIFr2HA3m81ISEhAXFwcCgsLBz3GYrFg7ty5mD17NoxG\no69rVJzVCtx9N6DTAdu3A19/DWRmKl0VEdHQ3A7LOJ1OxMfH49ChQ9Dr9UhLS0NJSQkSExN7j2lu\nbsbChQtx8OBBGAwGNDY2IvKWFbLUOizz6afA1q1ix6QZM4ALF0TPnYhIDn4blqmqqkJsbCyio6Oh\n0+mQk5OD0tLSfsfs3r0bq1atgsFgAIABwa5G3d3Azp1i2OXoUaCsDPjb3xjsRKQebmfL2O12RPXZ\nB85gMKCysrLfMVarFQ6HA0uXLkVrayuee+45PP744wPOZTKZer82Go0BO3zz178Cs2aJB5M2bAD6\nlE1E5FcWiwUWi8Un53Ib7iEhIcOewOFwoLq6GocPH0Z7ezsyMjIwf/58xMXF9TvOpIKUvHkT+PWv\ngfh44LPPlK6GiILNrR3fgoICr8/lNtz1ej1sNlvvZ5vN1jv80iMqKgqRkZEICwtDWFgYFi9ejJqa\nmgHhrgY7dwK//71Y9IuISM3cjiKnpqbCarWirq4OnZ2d2LNnD7Kzs/sds3LlShw7dgxOpxPt7e2o\nrKzErFmz/Fq0r0kScOYMkJ8P3H8/Z8IQkfq57bmHhoaiqKgImZmZcDqdyM3NRWJiIoqLiwEA+fn5\nSEhIQFZWFpKSkjBq1Cjk5eWpLtyXLxc7JsXEAHv2KF0NEdHIBfUTqpIEbNkCvPCCmBFz//1KV0RE\n5DKS7AzqcD9/Hpg/H9i0CfinfwI8uH9MRCQbhruXFiwA5s4FfvtbpSshIhpoJNkZtKtC7t0r5rR/\n8IHSlRAR+V5Qhvt77wE5OWJVx4gIpashIvK9oHugfu9e4OmngXXrgMWLla6GiMg/gi7cP/wQ0OuB\nl15SuhIiIv8JqmGZL78E/vIXoLCQwzFEpG1BNVvmRz8CIiOB3/xG6UqIiIbHnZg8dOwYkJqqdBVE\nRP4XNOFeUiK2yrvvPqUrISLyP82PuVdVAcXFYu2Yn/8cmD5d6YqIiPxP82PuWVnAmDHAL34B3Huv\n2AeViEgN+ITqIC5dAqqrgcpKYN8+ID1d6YqIiOSjyXA3m8Xa7G1tYjnf+fOVroiISF6aG5ZpagIm\nTgTWrgU2buR8diJSLw7L9PHyy+L91VeBUUEzF4iIqD/NhLskiSdPf/1r4JVXGOxEFNw0MyzT2Slm\nxWzaBKxfz403iEj9gv4J1Z07xTh7WprYMo/BTkTBTvU9964u0WN//XXgyScZ7ESkHUG9zd6xY8Ci\nRUB7OxAW5pdLEBEpIqiHZZxOsekGg52IyEX14X7qFIdiiIhupeqpkMXFwM9+BpSWKl0JEVFgUe2Y\ne0UFkJEBfP45EB/v01MTEQWEoLyh+uCDwFdfiZAnItKioLyhWl0NrFmjdBVERIFJlT33ri6xLntz\nMzB+vM9OS0QUUIKu515cLN7HjVO2DiKiQKW6cK+rAwoKgDNnOAWSiGgoqgp3pxN4/HGxF2pKitLV\nEBEFLlWF++bNwB13AP/8z0pXQkQU2FTzEFNFBfDaa8Dp01yrnYhoOKoI97Y2YMkSoKQEMBiUroaI\nKPCpYipkZibw4YdityUiomCh6amQX34pgv3QIaUrISJSj2HD3Ww2IyEhAXFxcSgsLBzyuFOnTiE0\nNBTvvvuuTwusqwOio4Fly3x6WiIiTXMb7k6nE2vXroXZbMbZs2dRUlKCc+fODXrc+vXrkZWV5dMn\nUW/eBBYuBObN89kpiYiCgttwr6qqQmxsLKKjo6HT6ZCTk4PSQdbX3bp1Kx5++GFMnjzZp8UVFYn3\nt9/26WmJiDTP7WwZu92OqKio3s8GgwGVlZUDjiktLcVHH32EU6dOIWSIx0ZNJlPv10ajEUaj0W1h\n7e3ASy+JvVH5JCoRBQOLxQKLxeKTc7kN96GCuq9169Zh8+bNvXd1hxqW6Rvunjh2TDyF+tRTt/Vj\nRESqdWvHt6CgwOtzuQ13vV4Pm83W+9lms8Fwy0Tz06dPIycnBwDQ2NiIAwcOQKfTITs72+uiACA7\nG1i1akSnICIKWm7nuXd1dSE+Ph6HDx/G9OnTMW/ePJSUlCAxMXHQ41evXo0f/OAHeOihh/pfxIu5\nmt/6FlBeDsyceVs/RkSkGSOZ5+625x4aGoqioiJkZmbC6XQiNzcXiYmJKP77mrv5+fleXXQ4N28C\nTU1cZoCIyFsB+YTqv/87UFYmHlxiwBNRsNLcE6oXLwIPPMBgJyLyVsDFpyQB778PTJ2qdCVEROoV\ncOF+5gwwZgzwj/+odCVEROoVcOFutQJz5/LBJSKikQi4cL9+HYiMVLoKIiJ1C7hwLy4GOjqUroKI\nSN0CLty/8Q2xCTYREXkv4ML9xAngrruUroKISN0CKtybm4HwcHFDlYiIvBdQ4V5eDixYANxxh9KV\nEBGpW0CFe2Wl2HmJiIhGJqDCvbYWSEpSugoiIvULmHA/fBg4cACYM0fpSoiI1C8gwv2TT4DvfhdI\nTwdmzFC6GiIi9QuIJX+jooAf/hB47TUuO0BE1GMkS/4GRLhPnAj89a9cdoCIqC9Vh3tDA/DNbwLd\n3ey1ExH1perNOv7jP4DRoxnsRES+pHi4h4YCGzcqXQURkbYoHu5EROR7ioZ7dzfwpz+JlSCJiMh3\nFL2heuYMsHQpcPUq15MhIrqVam+onjolHlxisBMR+Zai4X70KDBtmpIVEBFpk2LhLknARx8BDz6o\nVAVERNql2Jh7ZSUwfz4fXiIiGooqx9xffx245x4GOxGRPygS7i0tYqbM+vVKXJ2ISPsUCfcNG4Bx\n44CHH1bi6kRE2if7mLvDIaY+Xr4slvolIqLBqWrM3W4X7waD3FcmIgoesoe7wwFERPBGKhGRP8ke\n7n/+MzCKy5UREfmV7DFrsQDLlsl9VSKi4BIq9wXvvBP43vfkvioRUXAZtuduNpuRkJCAuLg4FBYW\nDvj+W2+9heTkZCQlJWHhwoWora0d5nyA0+l9wURENDy3UyGdTifi4+Nx6NAh6PV6pKWloaSkBImJ\nib3HnDx5ErNmzcL48eNhNpthMplQUVHR/yJ/n84jSWK8/eJFYOZM//1SRERa4LepkFVVVYiNjUV0\ndDR0Oh1ycnJQWlra75iMjAyMHz8eAJCeno76+vohz2ezic2wGexERP7ldszdbrcjqs+TRgaDAZWV\nlUMe/8Ybb2DFihWDfs9kMuHkSTEN0mIxwmg0elcxEZFGWSwWWCwWn5zLbbiH3MZk9PLycmzfvh3H\njx8f9Psmkwn33gv8278BzHUiooGMxv4d34KCAq/P5Tbc9Xo9bDZb72ebzQbDII+W1tbWIi8vD2az\nGREREUOe79o1MSxDRET+5XbMPTU1FVarFXV1dejs7MSePXuQnZ3d75jLly/joYcewq5duxAbG+v2\nYuHhwJQpIy+aiIjcc9tzDw0NRVFRETIzM+F0OpGbm4vExEQUFxcDAPLz87Fx40Y0NTVhzZo1AACd\nToeqqqoB55Ik4IsvgMhIP/wWRETUj2yrQt64IWHaNKCtzd9XIyLSBlWsCmmzAXffLdfViIiCm2zh\n7nQCOp1cVyMiCm5cn5GISIMY7kREGsRwJyLSINnCvbOTM2WIiOQiW7jX1oq13ImIyP9knS2zYIFc\nVyMiCm6yhfvNm0BYmFxXIyIKbrKF+//9HzBmjFxXIyIKbuy5ExFpkGzhfvo0MHq0XFcjIgpusoV7\nVxfwrW/JdTUiouAmW7jrdGKLPSIi8j/Zwr2qSvTeiYjI/2QLd0kC5syR62pERMFNtnAfO5Y3VImI\n5MKFw4iINIjhTkSkQQx3IiINYrgTEWkQw52ISIMY7kREGsRwJyLSIIY7EZEGMdyJiDSI4U5EpEEM\ndyIiDWK4ExFpEMOdiEiDZF3yl4iI5BEiSf6P3ZCQEAASWluBb3zD31cjItKGkJAQeBvRsvXcly5l\nsBMRyUW2cOfm2ERE8uENVSIiDWK4y8xisShdQsBgW7iwLVzYFr4xbLibzWYkJCQgLi4OhYWFgx7z\n7LPPIi4uDsnJyThz5ozPi9QS/uG6sC1c2BYubAvfcBvuTqcTa9euhdlsxtmzZ1FSUoJz5871O6as\nrAwXLlyA1WrFtm3bsGbNGr8WTEREw3Mb7lVVVYiNjUV0dDR0Oh1ycnJQWlra75j9+/fjiSeeAACk\np6ejubkZDQ0NA87Fee5ERDKS3PjjH/8oPfXUU72fd+7cKa1du7bfMQ888IB0/Pjx3s/Lli2TPv74\n437HAOCLL7744suLl7dC4YZ4+Gh4t06yv/XnZHhOioiI+nA7LKPX62Gz2Xo/22w2GAwGt8fU19dD\nr9f7uEwiIrodbsM9NTUVVqsVdXV16OzsxJ49e5Cdnd3vmOzsbLz55psAgIqKCkyYMAFTp071X8VE\nRDQst8MyoaGhKCoqQmZmJpxOJ3Jzc5GYmIji4mIAQH5+PlasWIGysjLExsZi7Nix2LFjhyyFExGR\nG16P1g/iwIEDUnx8vBQbGytt3rx50GOeeeYZKTY2VkpKSpKqq6t9efmAMlxb7Nq1S0pKSpLmzJkj\nLViwQKqpqVGgSnl48nchSZJUVVUljR49WnrnnXdkrE5enrRFeXm5lJKSIt1zzz3SkiVL5C1QRsO1\nxbVr16TMzEwpOTlZuueee6QdO3bIX6QMVq9eLU2ZMkWaPXv2kMd4k5s+C/euri4pJiZGunTpktTZ\n2SklJydLZ8+e7XfMBx98IN1///2SJElSRUWFlJ6e7qvLBxRP2uLEiRNSc3OzJEnijzyY26LnuKVL\nl0rf//73pX379ilQqf950hZNTU3SrFmzJJvNJkmSCDgt8qQtNmzYIL3wwguSJIl2mDhxouRwOJQo\n16+OHj0qVVdXDxnu3uamz5Yf8OWceLXzpC0yMjIwfvx4AKIt6uvrlSjV7zxpCwDYunUrHn74YUye\nPFmBKuXhSVvs3r0bq1at6p24EBkZqUSpfudJW0ybNg0tLS0AgJaWFkyaNAmhoW5HklVp0aJFiIiI\nGPL73uamz8LdbrcjKiqq97PBYIDdbh/2GC2Gmidt0dcbb7yBFStWyFGa7Dz9uygtLe19utnTKbhq\n40lbWK1WfP3111i6dClSU1Oxc+dOucuUhSdtkZeXh88++wzTp09HcnIyXn31VbnLDAje5qbP/jPo\nqznxWnA7v1N5eTm2b9+O48eP+7Ei5XjSFuvWrcPmzZt7Nya49W9EKzxpC4fDgerqahw+fBjt7e3I\nyMjA/PnzERcXJ0OF8vGkLTZt2oSUlBRYLBZcvHgRy5cvR01NDcLDw2WoMLB4k5s+C3fOiXfxpC0A\noLa2Fnl5eTCbzW7/t0zNPGmL06dPIycnBwDQ2NiIAwcOQKfTDZh2q3aetEVUVBQiIyMRFhaGsLAw\nLF68GDU1NZoLd0/a4sSJE/jlL38JAIiJicGMGTNw/vx5pKamylqr0rzOTZ/cEZAkyeFwSDNnzpQu\nXbokdXR0DHtD9eTJk5q9iehJW3zxxRdSTEyMdPLkSYWqlIcnbdHXk08+qdnZMp60xblz56Rly5ZJ\nXV1dUltbmzR79mzps88+U6hi//GkLZ5//nnJZDJJkiRJV65ckfR6vXT9+nUlyvW7S5cueXRD9XZy\n02c9d86Jd/GkLTZu3IimpqbecWadToeqqioly/YLT9oiWHjSFgkJCcjKykJSUhJGjRqFvLw8zJo1\nS+HKfc+TtnjxxRexevVqJCcno7u7G1u2bMHEiRMVrtz3HnvsMRw5cgSNjY2IiopCQUEBHA4HgJHl\npiwbZBMRkby4ExMRkQYx3ImINIjhTkSkQQx3IiINYrgTEWkQw52ISIP+H9s3+W9PTXC4AAAAAElF\nTkSuQmCC\n" } ], "prompt_number": 184 }, { "cell_type": "code", "collapsed": false, "input": [ "## test ridge classifier\n", "ridge_classifier = linear_model.RidgeClassifier(tol = 1e-1)\n", "benchmark(ridge_classifier)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "____________________________________________________________________________\n", "training: \n", "RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,\n", " normalize=False, tol=0.1)\n", "training time: 1.41" ] }, { "ename": "AttributeError", "evalue": "'module' object has no attribute 'confusion_matrix'", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m## test ridge classifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mridge_classifier\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinear_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRidgeClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1e-1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mbenchmark\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mridge_classifier\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36mbenchmark\u001b[0;34m(clf)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'confusion matrix:'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfusion_matrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'classification rate:'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: 'module' object has no attribute 'confusion_matrix'" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "test time: 0.00\n", "confusion matrix:\n" ] } ], "prompt_number": 102 }, { "cell_type": "code", "collapsed": false, "input": [ "## test linear SVC\n", "linear_svc_classifier = svm.LinearSVC(loss = 'l2', penalty = 'l1', dual = False, tol = 1e-3)\n", "benchmark(linear_svc_classifier)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "## do a grid search on meta-pararmeters of SVC\n", "cv_svc_classifier = grid_search.GridSearchCV(svm.SVC(probability = True), \n", " dict(\n", " kernel = ('rbf','linear'),\n", " C = (1, 10, 100, 1000),\n", " gamma = (0.01, 0.1, 1),\n", " ), \n", " n_jobs = -1\n", ")\n", "cv_svc_classifier.fit(train_X, train_y)\n", "print cv_svc_classifier.best_estimator_\n", "## SVC(C=10, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.1,\n", "## kernel=rbf, probability=False, shrinking=True, tol=0.001, verbose=False)\n", "misclassified = benchmark(cv_svc_classifier.best_estimator_)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "SVC(C=1, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.01,\n", " kernel=linear, probability=True, shrinking=True, tol=0.001,\n", " verbose=False)\n", "____________________________________________________________________________\n", "training: \n", "SVC(C=1, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.01,\n", " kernel=linear, probability=True, shrinking=True, tol=0.001,\n", " verbose=False)\n", "training time: 18.66" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "test time: 4.35" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "confusion matrix:\n", "[[1861 93]\n", " [ 297 396]]\n", "classification rate:\n", "85.266339252 %\n", "AUC for test: 0.884823524025\n", "ROC plot\n" ] }, { "output_type": "pyout", "prompt_number": 55, "text": [ "[(array([u'1',\n", " u\"To engage in an intelligent debate with you is like debating to a retarded person. It's useless. It looks like you're bent on disregarding the efforts of the government.\"], \n", " dtype=' Gee. That sounds like great advice. Now here's some for you: Mind your own f\\xf9cking business, assh\\xf8le. Now fuckoff. \"], \n", " dtype='\"Ya I\\'m just a moron troll\"


Do not sell yourself short, you are an idiot as well.
'], \n", " dtype='LMFAO AT YOU!!!!!!!! WHERE IS YOUR RECORD CAREER GOING DOG?!'], \n", " dtype=' Waht a PC tool you are. The NAACP still rtefers to Colored People. Grow UP!\"], \n", " dtype='nonsense'], \n", " dtype=' your wife GO AWAY'], \n", " dtype='