{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## RCV1-v2 Dataset [source here](http://www.jmlr.org/papers/volume5/lewis04a/lyrl2004_rcv1v2_README.htm)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import logging\n", "\n", "from sklearn.datasets import fetch_rcv1\n", "from sklearn.multiclass import OneVsRestClassifier\n", "from sklearn.metrics import f1_score, precision_score, recall_score\n", "from sklearn.pipeline import Pipeline\n", "from sklearn import svm\n", "\n", "logging.basicConfig()\n", "rcv1 = fetch_rcv1()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "training_samples = 23149\n", "\n", "X_train = rcv1.data[:training_samples]\n", "X_test = rcv1.data[training_samples:]\n", "\n", "y_train = rcv1.target[:training_samples]\n", "y_test = rcv1.target[training_samples:]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "clf = OneVsRestClassifier(svm.LinearSVC(penalty='l1',tol=0.01,multi_class='crammer_singer',dual=False))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/felipe/venv2/local/lib/python2.7/site-packages/sklearn/multiclass.py:70: UserWarning: Label not 49 is present in all training examples.\n", " str(classes[c]))\n", "/home/felipe/venv2/local/lib/python2.7/site-packages/sklearn/multiclass.py:70: UserWarning: Label not 80 is present in all training examples.\n", " str(classes[c]))\n" ] }, { "data": { "text/plain": [ "OneVsRestClassifier(estimator=LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", " multi_class='crammer_singer', penalty='l1', random_state=None,\n", " tol=0.01, verbose=0),\n", " n_jobs=1)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "y_pred = clf.predict(X_test)\n", "\n", "current_score = f1_score(y_test,y_pred,average='micro')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.80843419139591599" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "current_score" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }