{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.datasets import fetch_20newsgroups\n", "categories = [\n", " 'alt.atheism',\n", " 'talk.religion.misc',\n", " 'comp.graphics',\n", " 'sci.space',\n", "]\n", "fetch_subset = lambda subset: fetch_20newsgroups(\n", " subset=subset, categories=categories,\n", " shuffle=True, random_state=42,\n", " remove=('headers', 'footers', 'quotes'))\n", "train = fetch_subset('train')\n", "test = fetch_subset('test')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Pipeline(steps=[('vec', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',\n", " dtype=, encoding='utf-8', input='content',\n", " lowercase=True, max_df=1.0, max_features=None, min_df=1,\n", " ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,\n", " ...2', random_state=None,\n", " refit=True, scoring=None, solver='lbfgs', tol=0.0001, verbose=0))])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.linear_model import LogisticRegressionCV\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "\n", "vec = TfidfVectorizer()\n", "clf = LogisticRegressionCV()\n", "pipeline = Pipeline([('vec', vec), ('clf', clf)])\n", "pipeline.fit(train['data'], train['target'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import eli5\n", "from eli5 import explain_weights, explain_prediction\n", "from eli5.formatters import format_as_html, format_as_text, format_html_styles, fields\n", "\n", "# print(format_as_text(explain_weights(clf, vec, target_names=train['target_names'])))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.core.display import display, HTML\n", "show_html = lambda html: display(HTML(html))\n", "show_html_expl = lambda expl, **kwargs: show_html(format_as_html(expl, include_styles=False, **kwargs))\n", "show_html(format_html_styles())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", "\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=alt.atheism\n", " \n", "\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Weight?\n", " Feature
\n", " +18.117\n", " \n", " atheism\n", "
\n", " +16.558\n", " \n", " atheists\n", "
\n", " +14.393\n", " \n", " religion\n", "
\n", " +14.380\n", " \n", " bobby\n", "
\n", " +14.325\n", " \n", " matthew\n", "
\n", " +13.389\n", " \n", " motto\n", "
\n", " +13.215\n", " \n", " atheist\n", "
\n", " +13.010\n", " \n", " islam\n", "
\n", " +12.800\n", " \n", " nanci\n", "
\n", " +12.216\n", " \n", " enviroleague\n", "
\n", " +12.109\n", " \n", " loans\n", "
\n", " +11.672\n", " \n", " satan\n", "
\n", " +11.488\n", " \n", " posting\n", "
\n", " +11.173\n", " \n", " enlightening\n", "
\n", " +11.108\n", " \n", " natural\n", "
\n", " … 6382 more positive …\n", "
\n", " … 20478 more negative …\n", "
\n", " -11.259\n", " \n", " fake\n", "
\n", " -11.526\n", " \n", " order\n", "
\n", " -12.169\n", " \n", " christian\n", "
\n", " -12.253\n", " \n", " hudson\n", "
\n", " -18.551\n", " \n", " space\n", "
\n", "\n", " \n", " \n", "\n", " \n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Weight?\n", " Feature
\n", " +25.897\n", " \n", " graphics\n", "
\n", " +18.957\n", " \n", " image\n", "
\n", " +17.298\n", " \n", " computer\n", "
\n", " +16.843\n", " \n", " 3d\n", "
\n", " +16.190\n", " \n", " file\n", "
\n", " +14.020\n", " \n", " points\n", "
\n", " +13.269\n", " \n", " sgi\n", "
\n", " +13.180\n", " \n", " 42\n", "
\n", " +12.428\n", " \n", " hi\n", "
\n", " +11.835\n", " \n", " 3do\n", "
\n", " +11.175\n", " \n", " animation\n", "
\n", " +11.146\n", " \n", " using\n", "
\n", " +10.877\n", " \n", " code\n", "
\n", " +10.792\n", " \n", " package\n", "
\n", " +10.681\n", " \n", " video\n", "
\n", " +10.585\n", " \n", " screen\n", "
\n", " +10.571\n", " \n", " sphere\n", "
\n", " +10.570\n", " \n", " 68070\n", "
\n", " +10.553\n", " \n", " files\n", "
\n", " … 7893 more positive …\n", "
\n", " … 18967 more negative …\n", "
\n", " -18.127\n", " \n", " space\n", "
\n", "\n", " \n", " \n", "\n", " \n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Weight?\n", " Feature
\n", " +35.983\n", " \n", " space\n", "
\n", " +17.907\n", " \n", " orbit\n", "
\n", " +15.269\n", " \n", " nasa\n", "
\n", " +15.173\n", " \n", " launch\n", "
\n", " +13.235\n", " \n", " spacecraft\n", "
\n", " +12.872\n", " \n", " mars\n", "
\n", " +12.369\n", " \n", " nick\n", "
\n", " +12.117\n", " \n", " moon\n", "
\n", " +12.064\n", " \n", " allen\n", "
\n", " +11.800\n", " \n", " shuttle\n", "
\n", " +11.799\n", " \n", " dc\n", "
\n", " +10.934\n", " \n", " sci\n", "
\n", " +10.726\n", " \n", " solar\n", "
\n", " +10.716\n", " \n", " earth\n", "
\n", " … 10083 more positive …\n", "
\n", " … 16777 more negative …\n", "
\n", " -10.976\n", " \n", " file\n", "
\n", " -11.109\n", " \n", " wrong\n", "
\n", " -11.886\n", " \n", " image\n", "
\n", " -12.109\n", " \n", " religion\n", "
\n", " -13.500\n", " \n", " god\n", "
\n", " -18.002\n", " \n", " graphics\n", "
\n", "\n", " \n", " \n", "\n", " \n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=talk.religion.misc\n", " \n", "\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Weight?\n", " Feature
\n", " +19.215\n", " \n", " christian\n", "
\n", " +16.667\n", " \n", " blood\n", "
\n", " +14.907\n", " \n", " fbi\n", "
\n", " +14.185\n", " \n", " christians\n", "
\n", " +12.783\n", " \n", " hudson\n", "
\n", " +12.746\n", " \n", " order\n", "
\n", " +12.338\n", " \n", " christ\n", "
\n", " +12.126\n", " \n", " ekr\n", "
\n", " +11.972\n", " \n", " terrorist\n", "
\n", " +11.608\n", " \n", " koresh\n", "
\n", " +11.549\n", " \n", " dead\n", "
\n", " +11.185\n", " \n", " cult\n", "
\n", " … 6600 more positive …\n", "
\n", " … 20260 more negative …\n", "
\n", " -11.206\n", " \n", " anyone\n", "
\n", " -11.567\n", " \n", " could\n", "
\n", " -11.699\n", " \n", " get\n", "
\n", " -12.212\n", " \n", " thanks\n", "
\n", " -12.230\n", " \n", " edu\n", "
\n", " -12.319\n", " \n", " it\n", "
\n", " -13.026\n", " \n", " atheists\n", "
\n", " -17.289\n", " \n", " space\n", "
\n", "\n", " \n", " \n", "\n", " \n", "\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eli5.show_weights(clf, vec=vec, target_names=train['target_names'], horizontal_layout=False)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=alt.atheism\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -16.171)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.394\n", " \n", " <BIAS>\n", "
\n", " -14.777\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 8.616)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +9.631\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 0.001, score -6.824)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.016\n", " \n", " <BIAS>\n", "
\n", " -5.808\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=talk.religion.misc\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -11.885)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.019\n", " \n", " <BIAS>\n", "
\n", " -10.865\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_html_expl(\n", " explain_prediction(clf, test['data'][2], vec, target_names=train['target_names']),\n", " force_weights=False, horizontal_layout=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "``dense_multitarget=True`` is supported for prediction explanations too, and shows just the top prediction highlighting." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " y=alt.atheism\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -16.171)\n", "\n", "top features\n", " \n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 8.616)\n", "\n", "top features\n", " \n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 0.001, score -6.824)\n", "\n", "top features\n", " \n", " \n", " \n", " y=talk.religion.misc\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -11.885)\n", "\n", "top features\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +0.889\n", " \n", " some\n", "
\n", " +0.539\n", " \n", " much\n", "
\n", " +0.278\n", " \n", " is\n", "
\n", " +0.266\n", " \n", " which\n", "
\n", " +0.225\n", " \n", " designer\n", "
\n", " +0.218\n", " \n", " it\n", "
\n", " +0.161\n", " \n", " most\n", "
\n", " +0.107\n", " \n", " trying\n", "
\n", " -0.005\n", " \n", " interior\n", "
\n", " -0.009\n", " \n", " has\n", "
\n", " -0.053\n", " \n", " likes\n", "
\n", " -0.058\n", " \n", " sophisticated\n", "
\n", " -0.060\n", " \n", " any\n", "
\n", " -0.111\n", " \n", " my\n", "
\n", " -0.171\n", " \n", " find\n", "
\n", " -0.216\n", " \n", " there\n", "
\n", " -0.244\n", " \n", " for\n", "
\n", " -0.276\n", " \n", " more\n", "
\n", " -0.317\n", " \n", " suggestion\n", "
\n", " -0.371\n", " \n", " here\n", "
\n", " -0.385\n", " \n", " to\n", "
\n", " -0.390\n", " \n", " and\n", "
\n", " -0.397\n", " \n", " better\n", "
\n", " -0.400\n", " \n", " he\n", "
\n", " -0.407\n", " \n", " how\n", "
\n", " -0.432\n", " \n", " am\n", "
\n", " -0.462\n", " \n", " from\n", "
\n", " -0.509\n", " \n", " features\n", "
\n", " -0.606\n", " \n", " where\n", "
\n", " -0.615\n", " \n", " pc\n", "
\n", " -0.649\n", " \n", " hi\n", "
\n", " -0.664\n", " \n", " on\n", "
\n", " -0.698\n", " \n", " the\n", "
\n", " -0.755\n", " \n", " friend\n", "
\n", " -0.800\n", " \n", " help\n", "
\n", " -0.854\n", " \n", " costs\n", "
\n", " -0.892\n", " \n", " looking\n", "
\n", " -0.931\n", " \n", " buy\n", "
\n", " -1.269\n", " \n", " graphics\n", "
\n", " -1.394\n", " \n", " <BIAS>\n", "
\n", " -3.451\n", " \n", " software\n", "
\n", "\n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +3.120\n", " \n", " graphics\n", "
\n", " +2.661\n", " \n", " software\n", "
\n", " +1.707\n", " \n", " hi\n", "
\n", " +1.180\n", " \n", " looking\n", "
\n", " +1.127\n", " \n", " buy\n", "
\n", " +0.906\n", " \n", " features\n", "
\n", " +0.850\n", " \n", " pc\n", "
\n", " +0.672\n", " \n", " help\n", "
\n", " +0.530\n", " \n", " any\n", "
\n", " +0.520\n", " \n", " it\n", "
\n", " +0.474\n", " \n", " on\n", "
\n", " +0.459\n", " \n", " find\n", "
\n", " +0.347\n", " \n", " am\n", "
\n", " +0.346\n", " \n", " where\n", "
\n", " +0.314\n", " \n", " has\n", "
\n", " +0.311\n", " \n", " there\n", "
\n", " +0.233\n", " \n", " for\n", "
\n", " +0.225\n", " \n", " which\n", "
\n", " +0.078\n", " \n", " from\n", "
\n", " +0.046\n", " \n", " friend\n", "
\n", " +0.034\n", " \n", " trying\n", "
\n", " +0.028\n", " \n", " interior\n", "
\n", " +0.005\n", " \n", " costs\n", "
\n", " -0.018\n", " \n", " likes\n", "
\n", " -0.019\n", " \n", " better\n", "
\n", " -0.026\n", " \n", " my\n", "
\n", " -0.058\n", " \n", " here\n", "
\n", " -0.103\n", " \n", " designer\n", "
\n", " -0.109\n", " \n", " sophisticated\n", "
\n", " -0.155\n", " \n", " and\n", "
\n", " -0.172\n", " \n", " is\n", "
\n", " -0.217\n", " \n", " some\n", "
\n", " -0.244\n", " \n", " how\n", "
\n", " -0.251\n", " \n", " to\n", "
\n", " -0.254\n", " \n", " most\n", "
\n", " -0.370\n", " \n", " more\n", "
\n", " -0.534\n", " \n", " much\n", "
\n", " -0.760\n", " \n", " suggestion\n", "
\n", " -0.863\n", " \n", " the\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", " -2.388\n", " \n", " he\n", "
\n", "\n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +0.870\n", " \n", " costs\n", "
\n", " +0.637\n", " \n", " buy\n", "
\n", " +0.606\n", " \n", " software\n", "
\n", " +0.500\n", " \n", " most\n", "
\n", " +0.393\n", " \n", " the\n", "
\n", " +0.287\n", " \n", " on\n", "
\n", " +0.281\n", " \n", " some\n", "
\n", " +0.259\n", " \n", " better\n", "
\n", " +0.249\n", " \n", " likes\n", "
\n", " +0.241\n", " \n", " sophisticated\n", "
\n", " +0.213\n", " \n", " more\n", "
\n", " +0.183\n", " \n", " much\n", "
\n", " +0.176\n", " \n", " friend\n", "
\n", " +0.122\n", " \n", " from\n", "
\n", " +0.118\n", " \n", " there\n", "
\n", " +0.049\n", " \n", " here\n", "
\n", " +0.028\n", " \n", " and\n", "
\n", " +0.018\n", " \n", " to\n", "
\n", " +0.016\n", " \n", " how\n", "
\n", " -0.013\n", " \n", " interior\n", "
\n", " -0.041\n", " \n", " designer\n", "
\n", " -0.051\n", " \n", " where\n", "
\n", " -0.090\n", " \n", " it\n", "
\n", " -0.094\n", " \n", " has\n", "
\n", " -0.203\n", " \n", " suggestion\n", "
\n", " -0.234\n", " \n", " for\n", "
\n", " -0.295\n", " \n", " help\n", "
\n", " -0.316\n", " \n", " am\n", "
\n", " -0.327\n", " \n", " trying\n", "
\n", " -0.356\n", " \n", " any\n", "
\n", " -0.418\n", " \n", " which\n", "
\n", " -0.506\n", " \n", " pc\n", "
\n", " -0.519\n", " \n", " find\n", "
\n", " -0.529\n", " \n", " my\n", "
\n", " -0.567\n", " \n", " looking\n", "
\n", " -0.654\n", " \n", " features\n", "
\n", " -0.961\n", " \n", " hi\n", "
\n", " -1.016\n", " \n", " <BIAS>\n", "
\n", " -1.140\n", " \n", " is\n", "
\n", " -1.568\n", " \n", " he\n", "
\n", " -2.169\n", " \n", " graphics\n", "
\n", "\n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +2.181\n", " \n", " he\n", "
\n", " +0.528\n", " \n", " my\n", "
\n", " +0.481\n", " \n", " more\n", "
\n", " +0.345\n", " \n", " and\n", "
\n", " +0.313\n", " \n", " friend\n", "
\n", " +0.287\n", " \n", " suggestion\n", "
\n", " +0.228\n", " \n", " trying\n", "
\n", " +0.137\n", " \n", " find\n", "
\n", " +0.110\n", " \n", " here\n", "
\n", " +0.104\n", " \n", " from\n", "
\n", " -0.001\n", " \n", " interior\n", "
\n", " -0.028\n", " \n", " designer\n", "
\n", " -0.032\n", " \n", " likes\n", "
\n", " -0.044\n", " \n", " how\n", "
\n", " -0.052\n", " \n", " am\n", "
\n", " -0.057\n", " \n", " help\n", "
\n", " -0.139\n", " \n", " sophisticated\n", "
\n", " -0.140\n", " \n", " better\n", "
\n", " -0.202\n", " \n", " to\n", "
\n", " -0.204\n", " \n", " the\n", "
\n", " -0.218\n", " \n", " where\n", "
\n", " -0.231\n", " \n", " which\n", "
\n", " -0.236\n", " \n", " features\n", "
\n", " -0.278\n", " \n", " buy\n", "
\n", " -0.394\n", " \n", " there\n", "
\n", " -0.399\n", " \n", " costs\n", "
\n", " -0.473\n", " \n", " pc\n", "
\n", " -0.500\n", " \n", " much\n", "
\n", " -0.514\n", " \n", " looking\n", "
\n", " -0.556\n", " \n", " for\n", "
\n", " -0.621\n", " \n", " is\n", "
\n", " -0.692\n", " \n", " any\n", "
\n", " -0.699\n", " \n", " has\n", "
\n", " -0.732\n", " \n", " most\n", "
\n", " -1.006\n", " \n", " on\n", "
\n", " -1.019\n", " \n", " <BIAS>\n", "
\n", " -1.098\n", " \n", " hi\n", "
\n", " -1.212\n", " \n", " some\n", "
\n", " -1.218\n", " \n", " it\n", "
\n", " -1.295\n", " \n", " graphics\n", "
\n", " -2.308\n", " \n", " software\n", "
\n", "\n", " \n", " \n", "
\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=alt.atheism\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -16.171)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.394\n", " \n", " <BIAS>\n", "
\n", " -14.777\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 8.616)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +9.631\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 0.001, score -6.824)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.016\n", " \n", " <BIAS>\n", "
\n", " -5.808\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=talk.religion.misc\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -11.885)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.019\n", " \n", " <BIAS>\n", "
\n", " -10.865\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_html_expl(explain_prediction(clf, test['data'][2], vec, target_names=train['target_names']),\n", " force_weights=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can hide weights by passing ``force_weights=False`` (they still will be shown if it's impossible to highlight text)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=alt.atheism\n", " \n", "\n", "\n", " \n", " (probability 0.001, score -7.516)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.394\n", " \n", " <BIAS>\n", "
\n", " -6.122\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 6.432)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +7.447\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -10.113)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.016\n", " \n", " <BIAS>\n", "
\n", " -9.098\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=talk.religion.misc\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -11.681)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.019\n", " \n", " <BIAS>\n", "
\n", " -10.662\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_html_expl(explain_prediction(clf, test['data'][4], vec, target_names=train['target_names']), force_weights=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Show explanations for the winning class for first 10 documents from test data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 0.979, score 5.057)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +6.073\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.016\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " trry the skywatch project in arizona.\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 6.193)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +7.208\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " the vatican library recently made a tour of the us.\n", " can anyone help me in finding a ftp site where this collection is \n", " available.\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 8.616)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +9.631\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " hi there,\n", "\n", "i am here looking for some help.\n", "\n", "my friend is a interior decor designer. he is from thailand. he is\n", "trying to find some graphics software on pc. any suggestion on which\n", "software to buy,where to buy and how much it costs ? he likes the most\n", "sophisticated \n", "software(the more features it has,the better)\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.994, score 3.280)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +4.294\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " rfd\n", " request for discussion\n", " for the\n", " open telematic group\n", "\n", " otg\n", "\n", "i have proposed the forming of a consortium/task force for the\n", "promotion of naplps/jpeg, fif to openly discuss ways, method,\n", "procedures,algorythms, applications, implementation, extensions of\n", "naplps/jpeg standards. these standards should facilitate the creation\n", "of real_time online applications that make use of voice, video,\n", "telecommuting, hires graphics, conferencing, distant learning, online\n", "order entry, fax,in addition these dicussion would assist all to\n", "better understand how sgml, cals, oda, mime, oodbms, jpeg, mpeg,\n", "fractals, sql, cdrom, cdromxa, kodak photocd, tcl, v.fast, and\n", "eia/tia562, can best be incorporated and implemented to develop\n", "telematic/multimedia applications.\n", "\n", "we want to be able to support dos, unix, mac, windows, nt, os/2\n", "platforms. it is our hope that individuals, developers, corporations,\n", "universities, r & d labs would join in in supporting such an endeavor.\n", "\n", "this would be a not_for_profit group with bylaws and charter. already\n", "many corporations have decided to support otg (open telematic group) so\n", "do not delay joining if you are a developer\n", "\n", "an rfd has been posted to form a usenet newsgroup and a faq will soon\n", "be be composed to start promulgating what is known on the subject. if\n", "you would like to be added to the maillist send email or mail to the\n", "address below.\n", "\n", "this group would publish an electronic quarterly naplps/jpeg\n", "newsletter as well as a hardcopy version. we urge all who wants to\n", "see cmcs hires based applications & the naplps/jpeg g r o w, decide to\n", "join and mutually benefit from this not-for_profit endeavor.\n", "\n", "note: telematic has been defined by mr. james martin as the marriage\n", " of voice, video, hi-res graphics, fax, ivr, music over telephone\n", " lines/lan.\n", "\n", "if you would like to get involve write to me at:\n", "\n", " img inter-multimedia group| internet: epimntl@world.std.com\n", " p.o. box 95901 | ed.pimentel@gisatl.fidonet.org\n", " atlanta, georgia, us | cis : 70611,3703\n", " | fidonet : 1:133/407\n", " | bbs : +1-404-985-1198 zyxel 14.4k\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 6.432)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +7.447\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.643, score 0.494)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +1.509\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " \n", "\n", "i'm also interested in such a program. but most of all i'd like to know \n", "wich program is able to convert gif or pcx to dxf !!! when i have this \n", "program, i can scan pictures and frase (or something like that !) them.\n", "this will be beyond the limit !!!\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.412, score -0.996)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +0.019\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.015\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " \n", "\n", "\n", "or how about:\n", " "end light pollution now!!"\n", "\n", "your banner would have no effect on its subject, but my banner would.\n", "\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 1.000, score 10.393)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +11.409\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.016\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " : while i'm sure sagan considers it sacrilegious, that wouldn't be\n", ": because of his doubtfull credibility as an astronomer. modern, \n", ": ground-based, visible light astronomy (what these proposed\n", ": orbiting billboards would upset) is already a dying field: the\n", ": opacity and distortions caused by the atmosphere itself have\n", ": driven most of the field to use radio, far infrared or space-based\n", ": telescopes.\n", "\n", "hardly. the keck telescope in hawaii has taken its first pictures; they're\n", "nearly as good as hubble for a tiny fraction of the cost.\n", "\n", ": in any case, a bright point of light passing through\n", ": the field doesn't ruin observations. if that were the case, the\n", ": thousands of existing satellites would have already done so (satelliets\n", ": might not seem so bright to the eyes, but as far as astronomy is concerned,\n", ": they are extremely bright.)\n", "\n", "i believe that this orbiting space junk will be far brighter still;\n", "more like the full moon. the moon upsets deep-sky observation all\n", "over the sky (and not just looking at it) because of scattered light.\n", "\n", "this is a known problem, but of course two weeks out of every four are\n", "ok. what happens when this billboard circles every 90 minutes? what\n", "would be a good time then?\n", "\n", ": frank crary\n", ": cu boulder\n", "\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=alt.atheism\n", " \n", "\n", "\n", " \n", " (probability 0.991, score 8.925)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +10.319\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.394\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " \n", "not if you show that these hypothetical atheists are gullible, excitable\n", "and easily led from some concrete cause. in that case we would also\n", "have to discuss if that concrete cause, rather than atheism, was the\n", "factor that caused their subsequent behaviour.\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 0.850, score -0.580)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +0.436\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.016\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " picture our universe floating like a log\n", "in a river. as the log floats down the\n", "river, it occasionally strikes rocks, the\n", "bank, the bottom, other logs. when this collission\n", "occurs, kinetic energy is translated into heat, the\n", "log degrades, gets scraped up, and other energy \n", "translaions occur. the distribution of damage to\n", "the log depends on the shape of the log.\n", "\n", "however, to a very small virus in a mite on the head of a\n", "termite in the center of the log, the shock waves from the\n", "collissions would appear uniformly random in direction.\n", "\n", "this is my theory for grb. they are evidence of our universe\n", "interacting with other universes! why not! makes\n", "just as much sense as the grb coming from the oort cloud!\n", "\n", "the log theory of universes can't be ruled out!\n", "\n", "of course, i'm a layman in the physics world. you \n", "physicists out there, tell me about this !!!!\n", "\n", "

\n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "for doc in test['data'][:10]:\n", " expl = explain_prediction(clf, doc, vec, target_names=train['target_names'], top_targets=1)\n", " show_html_expl(expl, force_weights=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now use a vectorizer that skips stopwords" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Pipeline(steps=[('vec', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',\n", " dtype=, encoding='utf-8', input='content',\n", " lowercase=True, max_df=1.0, max_features=None, min_df=1,\n", " ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,\n", " ...2', random_state=None,\n", " refit=True, scoring=None, solver='lbfgs', tol=0.0001, verbose=0))])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vec_stop = TfidfVectorizer(stop_words='english')\n", "clf_stop = LogisticRegressionCV()\n", "pipeline_stop = Pipeline([('vec', vec_stop), ('clf', clf_stop)])\n", "pipeline_stop.fit(train['data'], train['target'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Words such as \"the\", \"in\", \"of\" are not used as features and are not highlighted" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "

Explained as: linear model

\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=alt.atheism\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -7.794)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.395\n", " \n", " <BIAS>\n", "
\n", " -6.399\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=comp.graphics\n", " \n", "\n", "\n", " \n", " (probability 0.999, score 5.992)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " +7.011\n", " \n", " Highlighted in text (sum)\n", "
\n", " -1.018\n", " \n", " <BIAS>\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=sci.space\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -7.692)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.017\n", " \n", " <BIAS>\n", "
\n", " -6.675\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "

\n", " \n", " \n", " y=talk.religion.misc\n", " \n", "\n", "\n", " \n", " (probability 0.000, score -10.365)\n", "\n", "top features\n", "

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "
\n", " Contribution?\n", " Feature
\n", " -1.070\n", " \n", " <BIAS>\n", "
\n", " -9.294\n", " \n", " Highlighted in text (sum)\n", "
\n", "\n", " \n", "\n", "\n", "\n", "

\n", " i am interested in finding 3d animation programs for the mac.\n", "i am especially interested in any programs that don't exist\n", "in a pc port and are so good that they would make me go buy\n", "a mac. do any such exist?\n", "

\n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_html_expl(explain_prediction(clf_stop, test['data'][4], vec_stop, target_names=train['target_names']), force_weights=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }