{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2016-12-14T22:38:26.470908", "start_time": "2016-12-14T22:38:24.335807" }, "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", " \"This module will be removed in 0.20.\", DeprecationWarning)\n" ] } ], "source": [ "from tpot import TPOTClassifier\n", "from sklearn.datasets import load_digits\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2016-12-14T17:08:24.362Z" }, "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "((1347, 64), (450, 64), (1347,), (450,))" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits = load_digits()\n", "X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n", " train_size=0.75, test_size=0.25)\n", "X_train.shape, X_test.shape, y_train.shape, y_test.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2016-12-14T17:08:24.375Z" }, "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Optimization Progress: 25%|██▌ | 10/40 [01:34<02:42, 5.40s/pipeline]" ] } ], "source": [ "tpot = TPOTClassifier(verbosity=2, max_time_mins=5, population_size=40)\n", "tpot.fit(X_train, y_train)\n", "print(tpot.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2016-12-14T17:08:24.385Z" }, "collapsed": false }, "outputs": [], "source": [ "tpot.export('tpot_mnist_pipeline.py')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2016-12-14T17:08:24.420Z" }, "collapsed": true }, "outputs": [], "source": [ "# %load tpot_mnist_pipeline.py\n", "import numpy as np\n", "\n", "from sklearn.ensemble import VotingClassifier\n", "from sklearn.feature_selection import RFE\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.pipeline import make_pipeline, make_union\n", "from sklearn.preprocessing import Binarizer, FunctionTransformer\n", "from sklearn.svm import SVC\n", "from tpot.operators.preprocessors import ZeroCount\n", "from xgboost import XGBClassifier\n", "\n", "# NOTE: Make sure that the class is labeled 'class' in the data file\n", "tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)\n", "features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)\n", "training_features, testing_features, training_classes, testing_classes = \\\n", " train_test_split(features, tpot_data['class'], random_state=42)\n", "\n", "exported_pipeline = make_pipeline(\n", " make_union(\n", " make_union(VotingClassifier([('branch',\n", " make_pipeline(\n", " Binarizer(threshold=0.62),\n", " XGBClassifier(learning_rate=1.0, max_depth=10, min_child_weight=20, n_estimators=500, subsample=1.0)\n", " )\n", " )]), FunctionTransformer(lambda X: X)),\n", " make_pipeline(\n", " RFE(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape=None, degree=3, gamma='auto', kernel='linear',\n", " max_iter=-1, probability=False, random_state=42, shrinking=True,\n", " tol=0.001, verbose=False), step=0.99),\n", " ZeroCount()\n", " )\n", " ),\n", " KNeighborsClassifier(n_neighbors=5, weights=\"distance\")\n", ")\n", "\n", "exported_pipeline.fit(training_features, training_classes)\n", "results = exported_pipeline.predict(testing_features)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }