{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "Getting text (harry potter series 1-3)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7\n" ] } ], "source": [ "# define a function to get .txt files in a folder\n", "from os import listdir\n", "def list_textfiles(directory):\n", " \"Return a list of filenames ending in '.txt' in DIRECTORY.\"\n", " textfiles = []\n", " for filename in listdir(directory):\n", " if filename.endswith(\".txt\"):\n", " textfiles.append(directory + \"/\" + filename)\n", " return textfiles \n", "\n", "# define a function to read the text in a .txt file\n", "\n", "def read_txt(filename):\n", " try:\n", " f = open(filename,'r')\n", " text = f.read()\n", " finally:\n", " if f:\n", " f.close()\n", " return text\n", "\n", "#import harry potter textfiles\n", "filenames = list_textfiles('HP')\n", "raw_texts = []\n", "for f in filenames:\n", " raw_texts.append(read_txt(f))\n", "print len(raw_texts)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0 0 0 0 0 0 0 0 0 0]\n", " [0 0 0 0 0 0 0 0 0 0]\n", " [0 0 0 0 0 0 0 0 0 0]\n", " [0 0 0 0 0 0 0 0 0 0]\n", " [0 0 0 0 0 0 0 0 0 0]\n", " [0 0 0 0 0 0 0 0 0 0]\n", " [0 0 0 0 0 0 0 0 0 0]]\n", "[u'anthem' u'anthems' u'anthill' u'anthology' u'anthony' u'anti'\n", " u'anticheating' u'anticipated' u'anticipating' u'anticipation']\n" ] } ], "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "\n", "vectorizer = CountVectorizer(input='content')\n", "# generate document term matrix\n", "dtm = vectorizer.fit_transform(raw_texts)\n", "\n", "# get the words as feature names\n", "\n", "vocab = np.array(vectorizer.get_feature_names())\n", "\n", "# fit_transform returns a sparse matrix (which uses less memory)\n", "# but we want to work with a normal numpy array.\n", "dtm = dtm.toarray()\n", "\n", "# normalize counts to rates per 1000 words\n", "rates = 1000 * dtm / np.sum(dtm, axis=1, keepdims=True)\n", "\n", "print rates[:, 1000:1010]\n", "print vocab[1000:1010]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Measuring distinctiveness\n", "\n", "Here, in my texts, we will identify the unique words used in each harry potter book.\n", "\n", "sklearn" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "22987\n" ] } ], "source": [ "from sklearn.feature_extraction.text import TfidfVectorizer\n", "\n", "vectorizer = TfidfVectorizer(input='content', min_df = 1, stop_words = 'english')\n", "\n", "# Let’s execute the model against our corpus:\n", "tfidf_matrix = vectorizer.fit_transform(raw_texts)\n", "feature_names = vectorizer.get_feature_names()\n", "\n", "tfidfArray = tfidf_matrix.toarray()\n", "\n", "print len(feature_names)\n", "\n", "#print tfidfArray" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So we are got more than 10 thousands words.\n", "View the top 5 terms of each news article based on the tfidf score" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tfidf for article 1 is:\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEyCAYAAAA4KJ7OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHmpJREFUeJzt3X2UVfV97/H3dwaJTxhNxSYKFvUSc200aqemNqYlXfVe\nRFlapDcm49P1RkRKmtQ0KWuZZPX26vIhraupSEBcmkht1BZM5gaq7c2TpjEtKD4gxJRrrmXQ3pBc\nRUWD4HzvH2czHibAnMHfzDlneL/WmsXZe/9mn++effbhc377d/aOzESSJElvXUezC5AkSRotDFaS\nJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQsY064kPP/zwnDRp\nUrOeXpIkqWGPPPLITzNz/GDtmhasJk2axKpVq5r19JIkSQ2LiGcbaeepQEmSpEIMVpIkSYUYrCRJ\nkgoxWEmSJBXStMHrzbBlyxbmzJnD2LFjmTJlCt3d3QA8//zzXHfddWQmF1xwAR/4wAc47rjjOPPM\nMzn11FOZNWtWkyuXJEntYJ/qsVq2bBkzZ85k8eLF9PT09M//8z//c8aNG0dHRwcTJkwA4OCDD+a1\n115j4sSJzSpXkiS1mX2qx6q3t5cTTzwRgM7Ozv75Tz31FF/84hc54ogj+PSnP81tt93G6tWryUzO\nOecczjrrrGaVLEmS2sg+1WM1YcIEent7Aejr69tp/mGHHcbBBx/Mz3/+cwA6Ojro7OzkbW97205t\nJUmSdicysylP3NXVlSN9gdAtW7Ywd+5c9t9/f8444wzuv/9+lixZwtq1a7nxxhuJCD72sY9x+OGH\nc8MNNwBw0kkn8clPfnJE65QkSa0lIh7JzK5B2+1LwUqSJGlvNBqs9qlTgZIkScNp1Aer7X3le+SG\nY52SJKn9jfpvBY7pCK5fva3oOuedsl/R9UmSpNFh1PdYSZIkjRSDlSRJUiEGK0mSpEIMVpIkSYUY\nrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKaShY\nRcTUiHg6ItZHxLzdtJkSEY9FxFMR8d2yZUqSJLW+MYM1iIhO4BbgTKAXWBkRPZm5tq7NocACYGpm\n/ltEHDFcBUuSJLWqRnqsTgPWZ+Yzmfk6cDdw7oA2HwWWZea/AWTmT8qWKUmS1PoaCVZHARvqpnur\nefXeDRwWEd+JiEci4uJdrSgiZkXEqohYtWnTpr2rWJIkqUWVGrw+Bvg14GzgPwOfi4h3D2yUmbdm\nZldmdo0fP77QU0uSJLWGQcdYARuBiXXTE6p59XqBn2XmFmBLRDwIvA/4UZEqJUmS2kAjPVYrgckR\ncUxEjAUuAHoGtPk6cEZEjImIA4H3A+vKlipJktTaBu2xysztETEXeADoBG7PzKciYna1fGFmrouI\n+4EngD7gtsxcM5yFS5IktZpGTgWSmSuAFQPmLRww/QXgC+VKkyRJai9eeV2SJKkQg5UkSVIhBitJ\nkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJ\nhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgox\nWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCS\nJEkqpKFgFRFTI+LpiFgfEfN2sXxKRGyOiMeqn8+XL1WSJKm1jRmsQUR0ArcAZwK9wMqI6MnMtQOa\nPpSZ5wxDjZIkSW2hkR6r04D1mflMZr4O3A2cO7xlSZIktZ9GgtVRwIa66d5q3kC/GRFPRMTfR8Sv\n7mpFETErIlZFxKpNmzbtRbmSJEmtq9Tg9UeBozPzJOBm4Gu7apSZt2ZmV2Z2jR8/vtBTS5IktYZG\ngtVGYGLd9IRqXr/MfCkzX6kerwD2i4jDi1UpSZLUBhoJViuByRFxTESMBS4AeuobRMQ7IyKqx6dV\n6/1Z6WIlSZJa2aDfCszM7RExF3gA6ARuz8ynImJ2tXwhMBO4MiK2A68BF2RmDmPdkiRJLWfQYAX9\np/dWDJi3sO7xfGB+2dIkSZLai1delyRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUY\nrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJ\nkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJ\nKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgppKFhFxNSIeDoi1kfEvD20+/WI\n2B4RM8uVKEmS1B4GDVYR0QncApwFnAB8JCJO2E27G4B/KF2kJElSOxjTQJvTgPWZ+QxARNwNnAus\nHdDu48BS4NeLVqhBbdmyhTlz5jB27FimTJlCd3c3AF/72te4//772bBhA5/73Oc46aSTmDVrFocc\ncghHHnkkn/3sZ5tcuSRJo0sjpwKPAjbUTfdW8/pFxFHA7wFfKleaGrVs2TJmzpzJ4sWL6enp6Z9/\n3nnnsXDhQq655hoeeugh1q1bx/HHH8+CBQt4/vnn2bBhwx7WKkmShqrU4PW/BP4kM/v21CgiZkXE\nqohYtWnTpkJPrd7eXiZOnAhAZ2fnTsv+4i/+gj/6oz9i6tSpnHLKKWzdupWrrrqK5557jo0bNzaj\nXEmSRq1GgtVGYGLd9IRqXr0u4O6I+D/ATGBBRJw3cEWZeWtmdmVm1/jx4/eyZA00YcIEent7Aejr\n2znbfupTn+Lee+/lpptuoqOjg2uuuYabbrqJww47jGOPPbYZ5UqSNGo1MsZqJTA5Io6hFqguAD5a\n3yAzj9nxOCK+DHwjM79WsE7twYwZM5g7dy7Lly9n+vTpXHTRRSxZsoTbbruNxx9/nM2bN3P55ZcD\nMGfOHLZt20ZXVxdHHHFEkyuXJGl0icwcvFHENGqn+zqB2zPz2oiYDZCZCwe0/TK1YPV3e1pnV1dX\nrlq1am/rHpLrV28rur55p+xXdH2SJKm1RcQjmdk1WLtGeqzIzBXAigHzFu6m7aWNrFOSJGm08crr\nbWp73+A9ja2wTkmS9iUN9Vip9YzpCE9xSpLUYuyxkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mS\npEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmF\nGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFY\nSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIaClYRMTUino6I9RExbxfLz42IJyLi\nsYhYFRFnlC9VkiSptY0ZrEFEdAK3AGcCvcDKiOjJzLV1zb4J9GRmRsRJwL3Ae4ajYEmSpFbVSI/V\nacD6zHwmM18H7gbOrW+Qma9kZlaTBwGJJEnSPqaRYHUUsKFuureat5OI+L2I+CGwHLisTHmSJEnt\no9jg9cy8LzPfA5wH/I9dtYmIWdUYrFWbNm0q9dSSJEktoZFgtRGYWDc9oZq3S5n5IHBsRBy+i2W3\nZmZXZnaNHz9+yMVKkiS1skaC1UpgckQcExFjgQuAnvoGEfEfIiKqx6cCbwN+VrpYSZKkVjbotwIz\nc3tEzAUeADqB2zPzqYiYXS1fCJwPXBwR24DXgA/XDWaXJEnaJwwarAAycwWwYsC8hXWPbwBuKFua\nJElSe/HK65IkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaS\nJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmS\nCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVmpZW7Zs4ZJLLuHyyy/nrrvu\n6p9//fXXc8UVV3D++efT29sLwBVXXMGJJ57YrFIlSQIMVmphy5YtY+bMmSxevJienp7++fPmzWPR\nokVceOGFfPvb3wZg0aJFHH/88c0qVZIkwGClFtbb28vEiRMB6Ozs3GnZK6+8wr333st5553XjNIk\nSdolg5Va1oQJE/pP9fX19fXPf+mll7jyyiu58cYbGTduXLPKkyTpFxis1LJmzJjB0qVLufLKK5k+\nfToXXXQRAJdeeikvvvgi1157Ld/61rcAuPrqq1m9ejWzZ89m69atzSxbkrQPi8xsyhN3dXXlqlWr\nRuS5rl+9rej65p2yX9H17a3Rul2SJLWaiHgkM7sGa2ePlSRJUiENBauImBoRT0fE+oiYt4vl3RHx\nREQ8GRHfj4j3lS9Vo932vuHpPR2u9UqSNNCYwRpERCdwC3Am0AusjIiezFxb1+zHwG9n5gsRcRZw\nK/D+4ShYo9eYjih+ehM8xSlJGjmN9FidBqzPzGcy83XgbuDc+gaZ+f3MfKGa/AEwoWyZkiRJra+R\nYHUUsKFuureatzv/Dfj7t1KUJElSOxr0VOBQRMSHqAWrM3azfBYwC+Doo48u+dRS29iyZQtz5sxh\n7NixTJkyhe7ubgBWrFjB/PnzmTZtGnPnzuWNN97gkksu4YADDuD111/njjvuoKPD75tIUitr5F16\nIzCxbnpCNW8nEXEScBtwbmb+bFcrysxbM7MrM7vGjx+/N/VKbW93t+qZNm0an/nMZ/qnX375ZcaN\nG8fixYs58MADefHFF5tRriRpCBoJViuByRFxTESMBS4AeuobRMTRwDLgosz8UfkypdFjT7fqqXfI\nIYewdetWzj77bLZt28Y73vGOkSpRkrSXBg1WmbkdmAs8AKwD7s3MpyJidkTMrpp9HvglYEFEPBYR\nI3PlT6kN7e5WPQM9+uijTJo0ieXLl3PMMcfw2GOPjVSJkqS91NCAjcxckZnvzszjMvPaat7CzFxY\nPf5YZh6WmSdXP4NemVTaV+3uVj0PP/wwN910E/fccw9Lly7lhBNOYN26dcyZM4fHH3+cyZMnN7ly\nSdJgig5elzS4gw46iDvuuKN/esfg9dNPP32nMVcAX/3qV0e0NknSW+NXjCRJkgoxWEmSJBVisJKG\nmfdAlKR9h2OspGHmPRAlad9hj5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAl\nSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkorYsmULl1xyCZdffjl33XVX//wVK1Ywbdo05s+fD0Bf\nXx9XXHEFF198MbNnz25WuZI0LAxWkopYtmwZM2fOZPHixfT09PTPnzZtGp/5zGf6pzs6Oli0aBF3\n3nknr7zyCn19fc0oV5KGhcFKUhG9vb1MnDgRgM7Ozj22Xbt2LZdccgmHHnooHR2+DUkaPXxHk1TE\nhAkT6O3tBRi0F+qEE07gK1/5Cn19fTz77LMjUZ4kjYgxzS5A0ugwY8YM5s6dy/Lly5k+fToXXXQR\nS5Ys4eGHH+amm27ihRde4F3vehenn3461113HX19fYwZM6a/l0uSRgODlaQiDjroIO64447+6e7u\nbgBOP/30ncZcAdx8880jWpskjRRPBUqSJBVisJK0V7b3ZVutV5JGgqcCJe2VMR3B9au3FV/vvFP2\nK75OSRop9lhJ0h40euFTgCuuuIITTzyxGWVKahEGK0nag0YvfAqwaNEijj/++JEuUVILMVhJ0h4M\n5cKn7WgoPXKSBmewkqQ9GMqFT9vRUHrkJA3OYCVJezBjxgyWLl3KlVde2X/hU6D/wqf33HMPS5cu\nBeDqq69m9erVzJ49m61btzaz7IaN1h45e+LULH4rUJL2YCgXPr322mu59tprR7S+t2pHj9zJJ588\nqnrkdvTETZ8+nQ9/+MP9+23atGkceOCBrFmzpskV7p0tW7YwZ84cxo4dy5QpU/q3a8WKFcyfP59p\n06Yxd+7cJle5b7PHSpL2YUPpkWsno7UnbrSdut1dz+KaNWvo7u6mu7ubNWvW8Oqrr3LhhRcyZ84c\nrrnmmiZWPLiGglVETI2IpyNifUTM28Xy90TEwxGxNSL+uHyZkjQy9rULn+7okfvSl75Ed3c3S5Ys\nAd7skXvooYc4//zzm1zl0I3WsXGjLTDuLih+8Ytf5JZbbmHBggXcfPPNrFu3juOPP54FCxbw/PPP\ns2HDhiZWvWeDngqMiE7gFuBMoBdYGRE9mbm2rtn/A/4QOG9YqpSkEeKFT0eHRm8K3m6hcbSduu3t\n7e2/9lt9UNy8eTOHHnooAC+//DKnnHIK9913H1dddRXPPfccGzdubNkbuDcyxuo0YH1mPgMQEXcD\n5wL9wSozfwL8JCLOHpYqJUlvyfa+ZExHtM1636qhjI1rJ6MtMO4uKL797W9n8+bNRATjxo2jo6Oj\n/xTgZZddxrHHHtuskgfVSLA6Cqjvc+sF3j885UiShoM9caPDaAuMuwuKn/jEJ/j4xz8O0D92bM6c\nOWzbto2uri6OOOKIZpa9RyP6rcCImAXMAjj66KNH8qklSaPQvtYTN9rsLii+973v5c4779yp7YIF\nC0a0tr3VSLDaCNSfyJxQzRuyzLwVuBWgq6urNUdySpLaxmjtiTMwtq9GgtVKYHJEHEMtUF0AfHRY\nq5IkaR9mYGyN9e6NQYNVZm6PiLnAA0AncHtmPhURs6vlCyPincAq4BCgLyI+CZyQmS8NY+2SJKmN\njNbAWK+hMVaZuQJYMWDewrrH/07tFKEkSdI+yyuvS5IkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJ\nUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRC\nDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRis\nJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYU0FKwiYmpEPB0R\n6yNi3i6WR0T8VbX8iYg4tXypkiRJrW3QYBURncAtwFnACcBHIuKEAc3OAiZXP7OALxWuU5IkqeU1\n0mN1GrA+M5/JzNeBu4FzB7Q5F7gza34AHBoR7ypcqyRJUktrJFgdBWyom+6t5g21jSRJ0qgWmbnn\nBhEzgamZ+bFq+iLg/Zk5t67NN4DrM/N71fQ3gT/JzFUD1jWL2qlCgOOBp0ttSAGHAz9tdhHDwO1q\nL25Xe3G72ovb1V5abbt+JTPHD9ZoTAMr2ghMrJueUM0bahsy81bg1gaec8RFxKrM7Gp2HaW5Xe3F\n7Wovbld7cbvaS7tuVyOnAlcCkyPimIgYC1wA9Axo0wNcXH078DeAzZn5fOFaJUmSWtqgPVaZuT0i\n5gIPAJ3A7Zn5VETMrpYvBFYA04D1wKvAfx2+kiVJklpTI6cCycwV1MJT/byFdY8T+IOypY24ljxF\nWYDb1V7crvbidrUXt6u9tOV2DTp4XZIkSY3xljaSJEmFGKwkSZIKGbXBKiImRcSaZtfRKiLizyLi\nd3cxf0p1HTIVMpyvvYj4/m7mf7m65txbWfeIHDMRcdsubovV0iLiTyPij9v1OPL9cN8SEZdGxJHN\nrmMwEfFK9e+REfF31eNLI2J+cyt7axoavL6viYgxmbl9d9PtKDM/3+wa3oqICGpjAvuaXUsz7HgN\nZuZvNruWXRnKMbLjYsPtqN2PI+0zLgXWAM81uY6GZOZzwFv6YNhKRm2PVaUzIhZHxFMR8Q8RcUBE\nXB4RKyPi8YhYGhEHQv8n/oUR8c/AjdUn1CUR8U/Akoh4MCJO3rHiiPheRLyvWRtW1XBQRCyvtmVN\nRHw4Ij5fbd+aiLi1CiQ79WhExNSI+GFEPArMaOY27En1KfvpiLiT2pvERRHxZLVtN9S1eyUirq3+\nDj+IiF9uXtX9hvLaO66q+8mIuKbuU9yUiHgoInqAtdW8HcsiIuZXf5//BRwxjHUfFxH3R8QjVT3v\nqWrY1THzlarNsxExIyJurLbr/ojYr/q970REV/X4I0PZpxExvvrbrax+PlBou39BRFwdET+KiO9R\nu1NEQ8dR9Xe4vdrOZyLiD4erxr0REcdGxOqI+HRELKv2zb9GxI11bX5hv0TE70fETdXjT0TEM3Xr\n+6fmbM2uRcT1EfEHddM7ehw/Xb1unoiI/97MGocqIj5XHe/fi4ivVttzcnV8PBER90XEYdXrswu4\nKyIei4gDml37YGI3PaoRcXZEPBwRh4/ksf+WZeao/AEmAduBk6vpe4ELgV+qa3MN8PHq8ZeBbwCd\n1fSfAo8AB1TTlwB/WT1+N7CqBbbxfGBx3fTbgXfUTS8Bptdt30xgf2r3dZwMRPV3+Uazt2UP+7AP\n+A3gSODfgPHUelq/BZxXtcu67bwR+GwL1D2U1943gI9Uj2cDr1SPpwBbgGPqfm/HshnAP1K7ttyR\nwIvAzGGq+5vA5Gre+4Fv1b2mBh4z3wP2A95H7Zp2Z1XL7qvbX9+h9sY/5H0K/A1wRvX4aGDdMO3D\nXwOeBA4EDqF2jb4/buQ4qv4O3wfeRu2WHD8D9muB1+QaagFxdbV/LgWeofa+sT/wLLU7aOxyvwDv\nBFZW6/s7ahePPorae+N1zdy+XWzvKcB366bXVnXeWu2vjuq1+1vNrrXB7fl14LFqP40D/rV6PT4B\n/HbV5s948/+o7wBdza67ge3a8X42CVhTPb4UmA/8HvAQcFg1f0SO/RI/o73H6seZ+Vj1+BFqO++9\n1SfqJ4Fu4Ffr2v9tZr5RN92Tma/tWAacU33qvozaG2yzPQmcGRE3RMQHM3Mz8KGI+Odq+36HnbcP\n4D3U/i7/mrVX6F+PcM1D9Wxm/oDaG8t3MnNT1k453QX8VtXmdWpvkvDmfm62obz2Tqf2+oLam0e9\nf8nMH+9i/b8FfDUz38haN/q3hrHu3wT+NiIeAxYB76prP/CY+fvM3EbttdkJ3F/Nf5Jf3C97s09/\nF5hf1dIDHBIRB+/dpu7RB4H7MvPVzHyJX7zbxGDH0fLM3JqZPwV+ArRCL+p44OtAd2Y+Xs37ZmZu\nzsyfUwsfv8Ju9ktm/jtwcESMoxbA/oba/vogtf8AW0ZmrgaOiNrYnfcBLwAnAv+JWrB8lNo+nNy8\nKofkA8DXM/Pnmfky8D+Bg4BDM/O7VZuv8Obx0+5+B/gT4OzMfKGaN1LH/ls22sdYba17/AZwALVA\ndF5mPh4Rl1LrFdhhy4Df75/OzFcj4h+Bc4H/Qu0TbVNl5o8i4lRqV72/Jmo3v/4Dap9UNkTEn1L7\nhNPOBu6TXdlW/ecGtf3cCq/rob72dqeR7S9pYN2/DLyYmSfvpv3A+rYCZGZfRNTvlz6Gtl92t087\ngN+ogkArG/h3bIXX5GZqPVFnUJ1aZuh1fp/anTWephamLqP2weBTRSst42+p9S6+E7iHWmi8LjMX\nNbUqNeJ/A8dSnR2q5rXLsT/qe6x2ZRzwfNXz1D3E370N+Ctq3eEvDNZ4uEXtWx+vZuZfA18ATq0W\n/bRK8rsaDPhDYFJEHFdNf2T4Ky3iX4Dfrs61d1Kr+7uD/E6r2d1r7wfUTutC7V6cjXgQ+HBEdEbE\nu4APlStzJy8BP46I34f+sV2lxhbuzT79B+DjOyaibtxjYQ8C50VtjNk4YPqA5e14HL1O7fTKxRHx\n0T2029N+eYjaKagHqfX8fAjYWvWWt5p7qB1PM6mFrAeAy3b0ckTEURFRamzicPsnYHpE7F/Vfw61\nDzUvRMQHqzYX8eZ+epna+027epbae+KdEbGjZ3+kjv23rBU+RY20zwH/DGyq/m34xZeZj0TES8Ad\nw1TbUJ0IfCEi+oBtwJXUxkKsAf6d2hiInWTmzyNiFrA8Il6l9kbZ8gdgZj4fEfOAb1MbI7E8M7/e\n5LKGanevvU8Cfx0RV1M7ddbIf1L3UesuX0utF+Lh4tW+qRv4UkR8ltr4qbuBx/f8K4Pby336h8At\nEfEEtfevB6mNSysqMx+NiHuobedPGHAstfFxtCUizqE2Pm/Jbtrsab88RO004IOZ+UZEbKAWMltO\n1u5pOw7YmJnPU/tQ8x+Bh6P2nZ5XqI0h/EkTy2xIZq6M2pdYngD+L7VT65upjRtbGLUvwjzDm/fp\n/XI1/zXg9LohLW0jM38YEd3UhiFMZ4SO/RK8pc0QVD1E3wHek/vo1/5VXvWm+FpmZkRcQG0g+7nN\nrktS64iIgzPzler94kFgVmY+2uy69Iv2xR6rvRIRFwPXAlcZqlTYr1EblBnUvt13WZPrkdR6bo3a\nhXX3B75iqGpd9lhJkiQVsi8OXpckSRoWBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQV8v8BTSdOxl1J\nyWEAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "tfidf for article 2 is:\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEyCAYAAAA4KJ7OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X20XHV97/H3Nyek8pAASrCURME2PqSiYo+0VKih6i3E\nngVN0xswhFCuYBKD7e2yGK+tcgUXD1KuyoMBLEQwV7QSbK5E0GoC9akmPAcQzAItBykJFggJAZKe\n7/1j7xyG40nOJPmdM3NO3q+1sjL7N7/Z8/3NntnnM7+9ZyYyE0mSJO26Ua0uQJIkaaQwWEmSJBVi\nsJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKGd2qOz7ggAPykEMOadXd\nS5IkNe32229/MjPHD9SvZcHqkEMOYdWqVa26e0mSpKZFxC+a6eehQEmSpEIMVpIkSYUYrCRJkgox\nWEmSJBXSspPXW2Hjxo3MmzePMWPGMGXKFGbOnAnA448/znnnnUdmcuKJJ/LOd74TgDlz5rDPPvtw\n0UUXtbJsSZI0TOxWM1ZLlixh+vTpXHXVVSxdurS3/aKLLmLs2LGMGjWKCRMmAPD1r3+dd7zjHa0q\nVZIkDUO7VbDq7u5m4sSJAHR0dPS233fffZxyyimcffbZnHPOOTzxxBPceeedvPvd725VqZIkaRja\nrYLVhAkT6O7uBqCnp+dl7fvvvz/77LMPzz//PLfeeitr167lU5/6FMuXL+ehhx5qVcmSJGkYicxs\nyR13dnbmUH9B6MaNG5k/fz6veMUrOOqoo7j55pu57rrruP/++7nwwguJCD7wgQ/0nmP185//nEsv\nvdRzrCRJ2s1FxO2Z2Tlgv90pWEmSJO2MZoPVbnUoUJIkaTCN+GC1paf8jNxgrFOSJA1/I/57rEaP\nCs6/c3PRdS44fI+i65MkSSPDiJ+xkiRJGioGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrE\nYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQV0lSwiohjI+LBiFgTEQu2\n0WdKRNwVEfdFxK1ly5QkSWp/owfqEBEdwGXAe4FuYGVELM3M+xv67AdcDhybmf8eEQcOVsGSJEnt\nqpkZqyOANZn5cGa+CFwPHN+nz/uBJZn57wCZubZsmZIkSe2vmWB1MPBow3J33dbo9cD+EbEiIm6P\niFP6W1FEnBERqyJi1bp163auYkmSpDZV6uT10cDvAe8D/gT4+4h4fd9OmXllZnZmZuf48eML3bUk\nSVJ7GPAcK+AxYGLD8oS6rVE38KvM3AhsjIjbgLcCDxWpUpIkaRhoZsZqJTApIg6NiDHAicDSPn3+\nGTgqIkZHxF7A7wMPlC1VkiSpvQ04Y5WZWyJiPnAL0AFcnZn3RcSc+vqFmflARNwM3AP0AF/MzNWD\nWbgkSVK7aeZQIJm5DFjWp21hn+XPAJ8pV5okSdLw4jevS5IkFWKwkiRJKsRgJUmSVIjBSpIkqRCD\nlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJ\nkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJ\nhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYU0Fawi4tiI\neDAi1kTEgn6unxIRz0TEXfW/T5QvVZIkqb2NHqhDRHQAlwHvBbqBlRGxNDPv79P1XzPzTwehRkmS\npGGhmRmrI4A1mflwZr4IXA8cP7hlSZIkDT/NBKuDgUcblrvrtr7+MCLuiYhvRcTv9reiiDgjIlZF\nxKp169btRLmSJEntq9TJ63cAr8nMtwCXAN/or1NmXpmZnZnZOX78+EJ3LUmS1B6aCVaPARMblifU\nbb0yc31mbqgvLwP2iIgDilUpSZI0DDQTrFYCkyLi0IgYA5wILG3sEBG/GRFRXz6iXu+vShcrSZLU\nzgYMVpm5BZgP3AI8AHwtM++LiDkRMafuNh1YHRF3A58HTszMHKyi9XIbN25k9uzZnH766SxevPhl\n1917770ceOCBbNiwgSeeeII5c+YwZ84cXvOa17B+/foWVSxJ0sg04NctQO/hvWV92hY2XL4UuLRs\naWrWkiVLmD59Ol1dXcyYMYOZM2cCsHnzZr74xS9y3HHHAfDqV7+ahQsXsnbtWjZt2sS4ceNaWbYk\nSSOO37w+AnR3dzNxYnUaXEdHR2/7RRddxIc//GHqo7S9Fi1axOzZs4e0RkmSdgcGqxFgwoQJdHd3\nA9DT09Pbftddd3HJJZfwk5/8hCuuuAKAzGT58uUcc8wxLalVkqSRzGA1AkybNo0bbriBuXPn0tXV\nxaxZswD46le/ymc/+1mOOOIIPvjBDwKwYsUKjj766F+bxZIkSbsuWnWOeWdnZ65atWpI7uv8OzcX\nXd+Cw/couj5JktTeIuL2zOwcqJ8zVpIkSYUYrIapLT3lZxoHY52SJO1Omvq6BbWf0aPCQ5ySJLUZ\nZ6wkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFY\nSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIk\nSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFNBWsIuLYiHgwItZExILt9HtHRGyJiOnl\nSpQkSRoeBgxWEdEBXAYcB0wGToqIydvodwHw7dJFSpIkDQfNzFgdAazJzIcz80XgeuD4fvqdCdwA\nrC1YnyRJ0rDRTLA6GHi0Ybm7busVEQcDfwZ8YXsriogzImJVRKxat27djtYqSZLU1kqdvP5Z4KOZ\n2bO9Tpl5ZWZ2Zmbn+PHjC921JElSexjdRJ/HgIkNyxPqtkadwPURAXAAMDUitmTmN4pUKUmSNAw0\nE6xWApMi4lCqQHUi8P7GDpl56NbLEbEI+KahSpIk7W4GDFaZuSUi5gO3AB3A1Zl5X0TMqa9fOMg1\nSpIkDQvNzFiRmcuAZX3a+g1UmXnqrpclSZI0/PjN65IkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJ\nklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSp\nEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEG\nK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhTQWriDg2Ih6M\niDURsaCf64+PiHsi4q6IWBURR5UvVZIkqb2NHqhDRHQAlwHvBbqBlRGxNDPvb+j2XWBpZmZEvAX4\nGvDGwShYkiSpXTUzY3UEsCYzH87MF4HrgeMbO2TmhszMenFvIJEkSdrNNBOsDgYebVjurtteJiL+\nLCJ+CtwEnNbfiiLijPpQ4ap169btTL2SJEltq9jJ65l5Y2a+ETgBOGcbfa7MzM7M7Bw/fnypu5Yk\nSWoLzQSrx4CJDcsT6rZ+ZeZtwOsi4oBdrE2SJGlYaSZYrQQmRcShETEGOBFY2tghIn4nIqK+/Hbg\nN4BflS5WkiSpnQ34qcDM3BIR84FbgA7g6sy8LyLm1NcvBP4cOCUiNgObgBkNJ7NLO2Xjxo3MmzeP\nMWPGMGXKFGbOnAnA+eefzyOPPMKTTz7J5z73OfbYYw8++clPArBs2TJWr17NuHHjWlm6JGk3NWCw\nAsjMZcCyPm0LGy5fAFxQtjTt7pYsWcL06dPp6upixowZvcFqwYLqq9RuvPFGli9fzqxZs1i4cCFr\n165l06ZNhipJUsv4zetqW93d3UycWJ3e19HR8bLrNmzYwNe+9jVOOOGE3rZFixYxe/bsIa1RkqRG\nBiu1rQkTJtDd3Q1AT09Pb/v69euZO3cuF154IWPHjgUgM1m+fDnHHHNMS2qVJAkMVmpj06ZN44Yb\nbmDu3Ll0dXUxa9YsAE499VSefvppPv3pT/O9730PgBUrVnD00UdTf4ZCkqSWiFadY97Z2ZmrVq0a\nkvs6/87NRde34PA9iq5vZ43UcUmS1G4i4vbM7ByonzNWkiRJhRis1Da29AzO7OlgrVeSpL6a+roF\naSiMHhXFD2+ChzglSUPHGStJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSp\nEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEG\nK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhTQWriDg2Ih6M\niDURsaCf62dGxD0RcW9E/DAi3lq+VEmSpPY2YLCKiA7gMuA4YDJwUkRM7tPtEeBdmXkYcA5wZelC\nJUmS2l0zM1ZHAGsy8+HMfBG4Hji+sUNm/jAzn6oXfwxMKFumJElS+2smWB0MPNqw3F23bcv/AL7V\n3xURcUZErIqIVevWrWu+SkmSpGGg6MnrEXEMVbD6aH/XZ+aVmdmZmZ3jx48vedeSJEktN7qJPo8B\nExuWJ9RtLxMRbwG+CByXmb8qU54kSdLw0cyM1UpgUkQcGhFjgBOBpY0dIuI1wBJgVmY+VL5MSZKk\n9jfgjFVmbomI+cAtQAdwdWbeFxFz6usXAp8AXgVcHhEAWzKzc/DKliRJaj/NHAokM5cBy/q0LWy4\n/AHgA2VLkyRJGl785nVJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOV\nJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJQ2xjRs3Mnv2bE4//XQWL17c275s\n2TKmTp3KpZdeCkBPTw8f/OAHOeWUU5gzZ06rypUk7QCDlTTElixZwvTp07nqqqtYunRpb/vUqVM5\n66yzepdHjRrFFVdcwbXXXsuGDRvo6elpRbmSpB1gsJKGWHd3NxMnTgSgo6Nju33vv/9+Zs+ezX77\n7ceoUb5cJanduaeWhtiECRPo7u4GGHAWavLkyXzpS1+ip6eHX/ziF0NRniRpF4xudQHS7mbatGnM\nnz+fm266ia6uLmbNmsV1113Hj370Iy6++GKeeuopDjroII488kjOO+88enp6GD16dO8sV7vauHEj\n8+bNY8yYMUyZMoWZM2cC1bljl156KVOnTmX+/Pn09PQwd+5cNm3axF577cXChQtbXLkklWOwkobY\n3nvvzTXXXNO7vDWAHHnkkS875wrgkksuGdLadsXWc8e6urqYMWNG77imTp3KXnvtxerVq4GXzh0D\nOPnkk+np6fEwp6QRw72ZpCI8d0ySDFaSCvHcMUnyUKA06Lb0JKNHxbBZ784aqeeOSdKOMFhJg2z0\nqOD8OzcXX++Cw/covs5dMVLPHZOkHeGhQEmSpEIMVpIkSYUYrCTtlC09OazWK0lDwXOsJO2U3eXc\nMUnaEc5YSZIkFWKwkiRJKsRgJUmSVIjBSpIkqZCmglVEHBsRD0bEmohY0M/1b4yIH0XECxHxkfJl\nSlJrbNy4kdmzZ3P66aezePHi3vbVq1czc+ZMZs6cyerVq3nuuec4+eSTmTdvHueee24LK5bUSgMG\nq4joAC4DjgMmAydFxOQ+3f4T+DBwUfEKJamFlixZwvTp07nqqqte9g3yn/vc57jsssu4/PLLueSS\nS3jggQd4wxvewOWXX87jjz/Oo48+2sKqJbVKMzNWRwBrMvPhzHwRuB44vrFDZq7NzJVA+c9eS1IL\ndXd39/6eYUdHR2/7M888w3777ce+++7Ls88+y+GHH84LL7zA3/zN3/DLX/6Sxx57rFUlN8WZOGlw\nNBOsDgYa33p1122SNOJNmDCB7u5uAHp6enrb9913X5555hnWr1/P2LFjGTVqFOeeey4XX3wx+++/\nP6973etaVXJTnImTBseQnrweEWdExKqIWLVu3bqhvGtJ2inTpk3jhhtuYO7cuXR1dTFr1iwA/uqv\n/oozzzyT+fPnc+aZZwIwb948Tj/9dDo7OznwwANbWfaARupMnNRqzXzz+mPAxIblCXXbDsvMK4Er\nATo7O/3dCkltb++99+aaa67pXZ45cyYAb37zm7n22mtf1vfyyy8f0tp2xdaZuLe97W39zsRFxMtm\n4gBOO+20tp+Jk1qtmWC1EpgUEYdSBaoTgfcPalWSpEE1bdo05s+fz0033dQ7E3fdddf1zsQBnHXW\nWUA1E7d58+ZhMRO3ceNG5s2bx5gxY5gyZUpvEF69ejXnnXceAB/72Md405vexOzZs9lzzz158cUX\nueaaaxg1ym8g0q4bMFhl5paImA/cAnQAV2fmfRExp75+YUT8JrAKGAf0RMRfA5Mzc/0g1i5JxW3p\nSUaPimGz3p01Umfitp471tXVxYwZM3rHtfXcsYjgrLPO4oILLmDs2LF84QtfYO7cuTz99NO88pWv\nbHH129ZsYBw/fjyf/OQnAVi2bBmrV69m3LhxLat7ICNxXE39CHNmLgOW9Wlb2HD5P6gOEUrSsOaP\nSw9v3d3dHHbYYUD/544BPPvss4wbN44XXniB973vfRx00EFtHaqg+cB4xRVXsHDhQtauXcumTZva\nNnxsNRLH5bynJGnEaPZTnHfccQeHHHIIN910E4ceeih33XVXq0puSrMfNthq0aJFzJ49e8jr3FEj\ncVwGK0naDWzpGZzPCw3WendWs5/inDx5Mg888ADz5s3j7rvvZtKkSS2ufPuaDYwAmcny5cs55phj\nWlLrjhiJ42rqUKAkaXjbXQ5x7si5Y1/5yleGtLZdsSMfNlixYgVHH300Ee1zTt+2jMRxGawkSWpz\nOxIYjznmmLaf1dlqJI7LQ4GSpGFrdznEqeHDGStJ0rA1Ug9x7i5f+zESGawkSWozBsb2WO/OMFhJ\nkqQhMVIDYyPPsZIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQV\nYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRg\nJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgppKlhFxLER8WBErImIBf1cHxHx+fr6\neyLi7eVLlSRJam8DBquI6AAuA44DJgMnRcTkPt2OAybV/84AvlC4TkmSpLbXzIzVEcCazHw4M18E\nrgeO79PneODarPwY2C8iDipcqyRJUltrJlgdDDzasNxdt+1oH0mSpBEtMnP7HSKmA8dm5gfq5VnA\n72fm/IY+3wTOz8zv18vfBT6amav6rOsMqkOFAG8AHiw1kAIOAJ5sdRGDwHENL45reHFcw4vjGl7a\nbVyvzczxA3Ua3cSKHgMmNixPqNt2tA+ZeSVwZRP3OeQiYlVmdra6jtIc1/DiuIYXxzW8OK7hZbiO\nq5lDgSuBSRFxaESMAU4ElvbpsxQ4pf504B8Az2Tm44VrlSRJamsDzlhl5paImA/cAnQAV2fmfREx\np75+IbAMmAqsAZ4D/nLwSpYkSWpPzRwKJDOXUYWnxraFDZcT+FDZ0oZcWx6iLMBxDS+Oa3hxXMOL\n4xpehuW4Bjx5XZIkSc3xJ20kSZIKMVhJkiQVMmKDVUQcEhGrW11Hu4iIT0XEe/ppn1J/D5l2UkRs\n2MnbrYiIX/so8c6ur+H2b4uIqTtxuyF5zUTEF/v5WSy1icZ9QkT8RkT8S0TcFREzWl1bfyLi7Ij4\nyHau39brbLu3a0cR8cZ6W9wZEb/d6nqGWkScGhG/1eo6BtLUyeu7m4gYnZlbtrU8HGXmJ1pdw66I\niKA6J7Cn1bW0s4gYDbwN6KTPB04G+36bfY1s/bJhDQuHA2Tm21pdyO4iIjoy87+2cfUJwNcz89wh\nrKed/v6dCqwGftniOrZrxM5Y1Toi4qqIuC8ivh0Re0bE6RGxMiLujogbImIvgIhYFBELI+LfgAvr\ndzPXRcQPgOsi4raI6N25RMT3I+KtrRpYXcPeEXFTPZbVETEjIj5Rj291RFxZB5Kt45teXz42In4a\nEXcA01o5hu2pZ1AejIhrqV5MsyLi3npsFzT02xARn64fhx9HxKtbVG9ExGfq+u5tfIcfER+t2+6O\niPP73G5UvX3ObWj7tfFERFdE/Fv9bvVfGtpf9lwFPgXM2MlZhv5eM78dETdHxO0R8a8R8cb6fvt7\nzXyp7vOLiJgWERfW4745Ivaob9c7gxARJ+3INo2I8fXrdmX97507OL6dEhHnR8SHGpbPjoiPRMTf\n1nXcExH/eyhqaUb92vlpvY0eiojFEfGeiPhBRPwsIo6o//2ofj79MCLe0GcdBwJfBt5RP5d+OyLe\nXfe/NyKujmpG648j4hsNt3tvRNw4yOP7eD2u71P9isfWmdof19vixojYv+Ems+oxrI6IIxra31o/\nBj+LiNPr9VwbESc03NfiiOj7+7g7W/fW7bI4Ih6IiK9HxF4R8fOIuCCqffJf9DeWqGah/xqYGxHL\n6/WdHBE/qcd2RUR01P8WxUv7of9Z9/1wRNxfr/P6uu2VEfGNuu3HEfGWur3vPmXQRMTfR7Wf/35E\nfKV+XfU3/ulUbxgX1+PdczDr2iWZOSL/AYcAW4C31ctfA04GXtXQ51zgzPryIuCbQEe9fDZwO7Bn\nvTwb+Gx9+fXAqjYY458DVzUs7wu8smH5OqCrYXzTgVdQ/a7jJCDqx+WbrR7LdrZhD/AHwG8B/w6M\np5pp/R5wQt0vG8Z5IfB3Q1znhobt8R2q73t7dV3vQcBxwA+Bvep+r6z/X1GP7SvAxxvW1+94gP15\n6ZO8HwD+YRvP1VOBS3fy8e7vNfNdYFLd9vvA9xqeU31fM98H9gDeSvWddsfV193YsL1WUO0gd3ib\nAv8XOKq+/BrggSHaxocDtzYs30+1T7iyfh2Nqh+LP2r166bPtjysru124Oq61uOBbwDjgNF1//cA\nN9SXp1DvE/pc3rrveH29fC3VH/oAfgqMb9hGXYM4tt8D7gX2qsewBvgIcA/wrrrPp3hpf72Cej8J\n/BGwuuH5ejewJ9VPpzxaPyffBXyj7rMv8MjWx6nQdkngnfXy1XXtPwfOaui3rbGcDXykvvwm4P8B\ne9TLlwOn1I/PdxrWtV/9/y+B3+jTdgnwyfryHwN3NdxP7z5lELflO4C76ufWWOBnTWzLzla/vgb6\nN9JnrB7JzLvqy7dTPanfXL+jvheYCfxuQ/9/ypdPwS7NzE1brwP+tH7XfRrVH5VWuxd4b/1O5+jM\nfAY4JqpZjXupXii/2+c2b6R6XH6W1TP1y0Nc8476RWb+mOoFuCIz12U1Lb2YaicJ8CLVHzV4aTu3\nwlHAVzLzvzLzCeBWqrrfA1yTmc8BZOZ/NtzmCqod/acb2rY1ngnALfW2/Vtevm0bn6u7or/XzB8C\n/xQRd9X1HtTQv+9r5luZuZnqudkB3Fy338uvb5ed2abvAS6ta1kKjIuIfXZuqM3LzDuBAyPit6Ka\nqX6KKrT8N+BO4A6q19akwa5lBzySmfdmdfj8PuC79Wt+67bYl2q7rgb+D7++r+jrDfU6H6qXv0QV\nJJPqTdzJEbEfcCTwreKjecnRwI2Z+Vxmrqd6HuxNFRZubayt4TZfAcjM26ieM/vV7f+cmZsy80lg\nOXBEvY5JETEeOIkqcJY8FPZoZv6gvvxlqv0GwFcBImLfAcay1bupQtTK+vXwbuB1wMPA6yLikog4\nFlhf97+HarbnZKrQTX3f1wFk5veAV0XEuPq6UvuU7Xkn1TZ4PjOfpQqKA23LtjfSz7F6oeHyf1G9\nM1lE9a747og4leod2VYb+9y+dzkzn4uI71C92/vvVE/olsrMhyLi7VTfen9uVD9+/SGqRP9oRJxN\n9U5gOOu7Tfqzud65Q7Wdh9Pz+odUYfgfMvP5um1b47kEuDgzl0bEFKp3lVs18zg1o+9r5tXA07nt\nc2z63u8LAJnZExGN4+hhx7bLth6DUcAfNDxWQ+mfqGZ9f5Pqj+BrgfMy84oW1NKMxm3Z07C8dVuc\nAyzPzD+LiEOoZgN21jVUfxSfpwrb7XJOzlZ9v7AxB2i/lmq29kTK/5LItu5zR1/DAXwpMz/2a1dU\n4f9PgDlUf69OA95HFVC6gI9HxGEDrL/UPmW3M9JnrPozFni8nnmauYO3/SLweWBlZj5VvLIdFNWn\nI57LzC8DnwHeXl/1ZP0ufno/N/spcEi89ImSkwa/0iJ+ArwrIg6IiA6qum8d4DZD7V+pzm3qqN/t\n/hFV3d8B/jJeOp/vlQ23+Ueqk8y/FtWJ59uzLy/9uPns7fR7lup5XsJ64JGI+AvoPY+s1LmFO7NN\nvw2cuXUhGs57HAJfpfpDO50qZN0CnLZ1xiwiDo7qvKThovH5dGoT/R+k2nf8Tr08i3p7ZeYvqQ41\n/R1VyBpMtwEnRHX+31iqoLAReCoiju5bW20GQEQcRfVbts/U7cdHxCsi4lVUb7JX1u2LqA5zkpn3\nF67/NRFxZH35/VSHz3vVtW1vLFt9F5i+9TlXny/12og4ABiVmTdQbY+3R8QoYGJmLgc+SrXt96Ha\nZ82sbz8FeLKeBRwqPwC66m2wD/CnbH9blty3DZrh9M6+lL8H/g1YV//f9EbKzNsjYj2Dv+No1mHA\nZyKiB9gMzKX61Mhq4D94aSfRKzOfj4gzgJsi4jmqF1bbP1Ez8/GIWEA1XR/ATZn5zy0uq68bqQ6D\n3E31LvSszPwP4OY6AKyKiBepgtT/2nqjzLy4nv6/LiK2F/bPpjp08xTV+UiHbqPfcmBBfXjgvMz8\n6i6OaybwhYj4O6rzp66nGuMu2clt+mHgsoi4h2r/dRvVu/JBl9VvpI4FHsvqR+Yfj4g3AT+K6jMi\nG6hmOdan8P6QAAABWklEQVQORT0FXAh8qd6uNw3Uud53/CXVc3A01f5lYUOXxVTnWT0wKNW+VMcd\nEfFVqufgWl7az80GFtZvYB7m5TNNz0fEnVTP39Ma2u+hev4dAJxTB0Qy84mIeIDqXLTSHgQ+FBFX\nU52r9wUa3iw0MRbqGu+vt9236+C0meqIxSbgmroN4GNUh+W/XO9nAvh8Zj5dH9W4un49Pcf237AV\nl5krI2Ip1XZ4guow9TNse/yL6vZNwJFDcKhyp/iTNjugniFaAbwx/di/JPWKiEuBOzPzH1tdy66q\n/6DfC7y9YXarxHoPofowwJtLrXO4i4h9MnND/ZjfBpyRmXe0uq5dsTseCtwpEXEK1QzXxw1VkvSS\niLgdeAvt/2GYAUX1RcoPAJeUDFXapivr2fU7qD4oMKxDFThjJUmSVIwzVpIkSYUYrCRJkgoxWEmS\nJBVisJIkSSrEYCVJklTI/wc82zb24gFmPwAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "tfidf for article 3 is:\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEyCAYAAAA4KJ7OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+UVeV97/H3l0GaqOCPiKlxsGhqTG1MYjqa2iQt1iZX\nSVkaSqMJIiQ3GCCY5rq8hqx6U2viEo3x1oqK4kIj4QatkJYbaEwa0aStqWAwiiRai1LHWJFcESFG\nIfO9f5zNeJgMzAGfmTMzvF9rsdg/ntnn+5yzzzmf8+x9zo7MRJIkSa/fkGYXIEmSNFgYrCRJkgox\nWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFDG3WDR922GE5evToZt28\nJElSwx588MGNmTmyp3ZNC1ajR49m1apVzbp5SZKkhkXE+kbaeShQkiSpEIOVJElSIQYrSZKkQgxW\nkiRJhTTt5PVm2Lp1KzNmzGDYsGGMGTOGiRMnAvDss89yxRVXkJmcc845vO997wNg2rRpHHjggVx9\n9dXNLFuSJA0Q+9SI1ZIlS5gwYQLz5s1j6dKlncuvvvpqhg8fzpAhQ2htbQXgrrvu4qSTTmpWqZIk\naQDap4JVe3s7o0aNAqClpaVz+aOPPsp5553HpZdeype+9CWee+45Vq9ezWmnndasUiVJ0gC0TwWr\n1tZW2tvbAejo6Nhp+SGHHMKBBx7IL3/5S+677z42bNjAZZddxooVK3j88cebVbIkSRpAIjObcsNt\nbW3Z1z8QunXrVmbOnMkb3vAG3v/+9/Ptb3+bBQsWsHbtWq666ioigk996lOd51g99dRTzJkzx3Os\nJEnax0XEg5nZ1mO7fSlYSZIk7Y1Gg9U+dShQkiSpNw36YLW9o/yIXG9sU5IkDXyD/neshg4JZq/e\nVnSbs07cr+j2JEnS4DDoR6wkSZL6isFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIk\nFWKwkiRJKsRgJUmSVEhDwSoiTo+IxyLiiYiYtYs2YyLioYh4NCLuK1umJElS/9fjJW0iogW4Hvgg\n0A6sjIilmbm2rs3BwA3A6Zn5nxFxeG8VLEmS1F81MmJ1MvBEZq7LzFeBRcCZXdp8HFiSmf8JkJkb\nypYpSZLU/zUSrI4Enq6bb6+W1XsbcEhE3BsRD0bEeaUKlCRJGih6PBS4B9v5PeA04I3A/RHxw8x8\nvL5RRJwPnA9w1FFHFbppSZKk/qGREatngFF1863VsnrtwN2ZuTUzNwLfB97VdUOZeXNmtmVm28iR\nI/e2ZkmSpH6pkWC1Ejg2Io6OiGHAOcDSLm3+AXh/RAyNiP2B9wI/KVuqJElS/9bjocDM3B4RM4G7\ngRZgfmY+GhHTqvVzM/MnEfFt4GGgA7glM9f0ZuGSJEn9TUPnWGXmcmB5l2Vzu8x/BfhKudIkSZIG\nFn95XZIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQ\ng5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYr\nSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIk\nSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklRIQ8EqIk6PiMci4omImNXN+jER8WJEPFT9+2L5UiVJ\nkvq3oT01iIgW4Hrgg0A7sDIilmbm2i5Nf5CZf9oLNUqSJA0IjYxYnQw8kZnrMvNVYBFwZu+WJUmS\nNPA0EqyOBJ6um2+vlnX1BxHxcET8Y0T8bncbiojzI2JVRKx6/vnn96JcSZKk/qvUyes/Ao7KzHcC\n1wF/312jzLw5M9sys23kyJGFblqSJKl/aCRYPQOMqptvrZZ1yszNmbmlml4O7BcRhxWrUpIkaQBo\nJFitBI6NiKMjYhhwDrC0vkFE/GZERDV9crXdn5cuVpIkqT/r8VuBmbk9ImYCdwMtwPzMfDQiplXr\n5wITgOkRsR14GTgnM7MX65YkSep3egxW0Hl4b3mXZXPrpucAc8qWJkmSNLD4y+uSJEmFGKwkSZIK\nMVhJkiQVYrCSJEkqxGAlSZJUiMFqENi6dSuTJ09m6tSpLFy4cKd1jzzyCIcffjhbtmwBYP78+Vxw\nwQVccsklzShVkqRBzWA1CCxZsoQJEyYwb948li597bdbt23bxi233MIZZ5wBwIYNG7jjjjs44IAD\nOOKII5pVriRJg5bBahBob29n1KjaVYdaWlo6l1999dV89rOfpfpRfNatW8ehhx7K7NmzWb9+PevW\nrWtKvZIkDVYGq0GgtbWV9vZ2ADo6OjqXP/TQQ1x33XU88MAD3HTTTRx55JEceuihABx88MG89NJL\nTalXkqTBqqFfXlf/Nn78eGbOnMmyZcsYN24ckyZNYsGCBdxxxx0ATJkyhU9/+tMceOCBHHrooVx4\n4YVs27aNd73rXU2uXJKkwSWadUm/tra2XLVqVZ/c1uzV24pub9aJ+xXdniRJ6t8i4sHMbOupnYcC\nJUmSCjFYDVDbO8qPNPbGNiVJ2pd4jtUANXRIeIhTkqR+xhErSZKkQgxWkiRJhRisJEmSCjFYSZIk\nFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrE\nYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFNBSsIuL0iHgsIp6IiFm7aXdSRGyPiAnlSpQk\nSRoYegxWEdECXA+cARwPfCwijt9FuyuB75QuUpIkaSBoZMTqZOCJzFyXma8Ci4Azu2l3AbAY2FCw\nPkmSpAGjkWB1JPB03Xx7taxTRBwJfAS4cXcbiojzI2JVRKx6/vnn97RWSZKkfq3Uyet/A3w+Mzt2\n1ygzb87MtsxsGzlyZKGbliRJ6h+GNtDmGWBU3XxrtaxeG7AoIgAOA8ZGxPbM/PsiVUqSJA0AjQSr\nlcCxEXE0tUB1DvDx+gaZefSO6Yi4DfiWoUqSJO1regxWmbk9ImYCdwMtwPzMfDQiplXr5/ZyjZIk\nSQNCIyNWZOZyYHmXZd0Gqsyc8vrLkiRJGnj85XVJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkq\nxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjB\nSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5Uk\nSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpkIaCVUScHhGPRcQT\nETGrm/VnRsTDEfFQRKyKiPeXL1WSJKl/G9pTg4hoAa4HPgi0AysjYmlmrq1r9j1gaWZmRLwTuBN4\ne28ULEmS1F81MmJ1MvBEZq7LzFeBRcCZ9Q0yc0tmZjV7AJBIkiTtYxoJVkcCT9fNt1fLdhIRH4mI\nnwLLgE+WKU+SJGngKHbyemZ+MzPfDpwFfKm7NhFxfnUO1qrnn3++1E1LkiT1C40Eq2eAUXXzrdWy\nbmXm94FjIuKwbtbdnJltmdk2cuTIPS5WkiSpP2skWK0Ejo2IoyNiGHAOsLS+QUT8dkRENf0e4DeA\nn5cuVpIkqT/r8VuBmbk9ImYCdwMtwPzMfDQiplXr5wJ/BpwXEduAl4Gz605mlyRJ2if0GKwAMnM5\nsLzLsrl101cCV5YtTZIkaWDxl9clSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYr\nSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVEhDF2GWmmHr1q3MmDGDYcOGMWbMGCZO\nnAjA7NmzefLJJ9m4cSPXXnstra2tAEybNo0DDzyQq6++upllS5L2YY5Yqd9asmQJEyZMYN68eSxd\nurRz+axZs7jppps499xzWbFiBQB33XUXJ510UrNKlSQJMFipH2tvb2fUqFEAtLS07LRuy5Yt3Hnn\nnZx11lk899xzrF69mtNOO60ZZUqS1MlgpX6rtbWV9vZ2ADo6OjqXb968menTp3PVVVcxfPhw7rvv\nPjZs2MBll13GihUrePzxx5tVsiRpH2ewUr81fvx4Fi9ezPTp0xk3bhyTJk0CYMqUKWzatInLL7+c\ne+65h49+9KPMmzePL37xi5x66qm87W1va3LlkqR9VWRmU264ra0tV61a1Se3NXv1tqLbm3XifkW3\nt7cGa78kSepvIuLBzGzrqZ0jVpIkSYUYrNRvbO/ondHT3tquJEld+TtW6jeGDonihzfBQ5ySpL7j\niJUkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxJ9bkJpk69atzJgxg2HDhjFmzBgm\nTpwIwOzZs3nyySfZuHEj1157La2trU2uVJLUKEespCZZsmQJEyZMYN68eSxdurRz+axZs7jppps4\n99xzWbFiRRMrlCTtKYOV1CTt7e2MGjUKgJaWlp3WbdmyhTvvvJOzzjqrGaVJkvaSwUpqktbWVtrb\n2wHo6OjoXL5582amT5/OVVddxfDhw5tVniRpLxispCYZP348ixcvZvr06YwbN45JkyYBMGXKFDZt\n2sTll1/OPffc0+QqJUl7oqGT1yPidOBaoAW4JTNnd1k/Efg8EMBLwPTM/HHhWqVB5YADDuDWW2/t\nnN9x8vqSJUuaVZIk6XXqccQqIlqA64EzgOOBj0XE8V2aPQn8UWaeAHwJuLl0oZIkSf1dI4cCTwae\nyMx1mfkqsAg4s75BZv5rZr5Qzf4Q8PvhUmV7Rw6o7UqS9l4jhwKPBJ6um28H3rub9v8d+MfuVkTE\n+cD5AEcddVSDJUoD29AhwezV24pvd9aJ+xXfpiTp9Sl68npEnEotWH2+u/WZeXNmtmVm28iRI0ve\ntCRJUtM1MmL1DDCqbr61WraTiHgncAtwRmb+vEx5kiRJA0cjI1YrgWMj4uiIGAacAyytbxARRwFL\ngEmZ+Xj5MiVJkvq/HkesMnN7RMwE7qb2cwvzM/PRiJhWrZ8LfBF4E3BDRABsz8y23itbkiSp/2no\nd6wyczmwvMuyuXXTnwI+VbY0SZKkgcVfXpckSSrEYCWpiK1btzJ58mSmTp3KwoULO5cvX76csWPH\nMmfOHACee+45pk2bxrRp0zjqqKPYvHlzs0qWpOIaOhQoST1ZsmQJEyZMYNy4cZx99tmdl+gZO3Ys\n+++/P2vWrAHgzW9+M3PnzmXDhg28/PLLjBgxopllS1JRjlhJKqK9vZ1Ro2q/zNLS0tJj+9tuu43J\nkyf3dlmS1KcMVpKKaG1tpb29HYCOjo7dts1MVqxYwamnntoXpUlSnzFYSSpi/PjxLF68mOnTpzNu\n3DgmTZoEwP33388111zDHXfcweLFiwG49957+cAHPkD18yySNGh4jpWkIg444ABuvfXWzvkd51id\ncsopLF26028Kc+qppzpaJWlQcsRKkiSpEIOVJElSIQYrSXtle0cOqO1KUl/wHCtJe2XokGD26m3F\ntzvrxP2Kb1OS+oojVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5Uk7cbWrVuZ\nPHkyU6dOZeHChZ3Lly9fztixY5kzZw5Qu/D0pz/9ac477zymTZvWrHIlNZnBSpJ2Y8mSJUyYMIF5\n8+btdM3DsWPHcvHFF3fODxkyhJtuuonbb7+dLVu20NHR0YxyJTWZwUqSdqO9vZ1Ro0YB0NLSstu2\na9euZfLkyRx88MEMGeLLq7Qv8pkvSbvR2tpKe3s7QI+jUMcffzxf+9rX6OjoYP369X1R3l5r9BDn\nY489xic/+Uk+8YlPcOWVVzarXGnA8JI2krQb48ePZ+bMmSxbtoxx48YxadIkFixYwP33388111zD\nCy+8wBFHHMEpp5zCFVdcQUdHB0OHDu0c5eqvdhziHDduHGeffTYTJ04Eaoc4999/f9asWQPAcccd\nx/z584HafSFp9wxWkrQbBxxwALfeemvn/I4Acsopp+x0zhXAdddd16e1vR7t7e2ccMIJQM+HOAEW\nLVrEhz70od4uSxrwPBQoSfugPTnEuWjRItavX++3HaUGGKwkaR80fvx4Fi9ezPTp0zsPcQKdhzjv\nuOMOFi9ezOrVq7nooot48sknufDCC5tctdT/eShQkups70iGDokBs929tSeHOHeMbEnqmcFKkuoM\nHRLMXr2t+HZnnbhf8W1K6n88FChJ+4DtHTmgtisNVI5YSdI+wJE4qW84YiVJklSIwUqSJKkQg5Uk\nSVIhDQWriDg9Ih6LiCciYlY3698eEfdHxCsRcVH5MiVJkvq/Hk9ej4gW4Hrgg0A7sDIilmbm2rpm\n/w/4LHBWr1QpSVIDtm7dyowZMxg2bBhjxozp/H2uNWvWcMUVVwDwhS98gWOOOYbzzz+fESNG8Ja3\nvIVLLrmkmWVrEGlkxOpk4InMXJeZrwKLgDPrG2TmhsxcCZT/yokkSQ3acXHpefPm7fRDp9deey3X\nX389N9xwA9dddx0/+clPOO6447jhhht49tlnefrpp5tYdc+2bt3K5MmTmTp1KgsXLuxcvmbNGiZO\nnMjEiRNZs2YNv/rVrzj33HOZOnUqkydP7vFyRSqvkWB1JFC/x7VXy/ZYRJwfEasiYtXzzz+/N5uQ\nJGmX2tvbGTVqFLDzxaVffPFFDj74YA466CBeeuklTjzxRF555RUuvPBCfvazn/HMM880q+SGNBoY\nX3rpJYYPH868efPYf//92bRpUxOr3jf16cnrmXlzZrZlZtvIkSP78qYlSfuAXV1c+qCDDuLFF19k\n8+bNDB8+nCFDhvDlL3+Za665hkMOOYRjjjmmWSU3pNHAOGLECF555RU+/OEPs23bNg499NBmlbzP\naiRYPQOMqptvrZZJktSv7Ori0n/xF3/BBRdcwMyZM7ngggsAmDFjBlOnTqWtrY3DDz+8mWX3qNHA\n+KMf/YjRo0ezbNkyjj76aB566KFmldyQwXiIs5FfXl8JHBsRR1MLVOcAH+/VqiRJ2gu7urj0O97x\nDm6//fad2t5www19WtvrMX78eGbOnMmyZcs6A+OCBQs6AyPAxRdfzDHHHMNXv/pVZsyYwcaNG/nc\n5z7X5Mp3b8chznHjxnH22Wd3Pl47DnFGBBdffDFXXnklw4cP58Ybb2T69Ols2rSp347G9RisMnN7\nRMwE7gZagPmZ+WhETKvWz42I3wRWASOAjoj4HHB8Zm7uxdolSdon7Elg/MY3vtGntb0e7e3tnHDC\nCUD3hziBXzvEecQRR/TbUAUNnmOVmcsz822Z+dbMvLxaNjcz51bT/5WZrZk5IjMPrqYNVZKkXuXF\npQe2wXiI04swS5IGLC8uPbANxkOcBitJkvqZ7R3J0CExYLa7twbjIU6DlSRJ/YwjcQOXF2GWJEl9\nYl84J84RK0mS1Cf2hZE4R6wkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKk\nQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUY\nrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJ\nkiQV0lCwiojTI+KxiHgiImZ1sz4i4m+r9Q9HxHvKlypJktS/9RisIqIFuB44Azge+FhEHN+l2RnA\nsdW/84EbC9cpSZLU7zUyYnUy8ERmrsvMV4FFwJld2pwJ3J41PwQOjogjCtcqSZLUrzUSrI4Enq6b\nb6+W7WkbSZKkQS0yc/cNIiYAp2fmp6r5ScB7M3NmXZtvAbMz85+r+e8Bn8/MVV22dT61Q4UAxwGP\nlepIAYcBG5tdRC+wXwOL/RpY7NfAYr8Glv7Wr9/KzJE9NRrawIaeAUbVzbdWy/a0DZl5M3BzA7fZ\n5yJiVWa2NbuO0uzXwGK/Bhb7NbDYr4FloParkUOBK4FjI+LoiBgGnAMs7dJmKXBe9e3A3wdezMxn\nC9cqSZLUr/U4YpWZ2yNiJnA30ALMz8xHI2JatX4usBwYCzwB/AL4RO+VLEmS1D81ciiQzFxOLTzV\nL5tbN53AZ8qW1uf65SHKAuzXwGK/Bhb7NbDYr4FlQParx5PXJUmS1BgvaSNJklSIwUqSJKmQQRus\nImJ0RKxpdh39RURcFhF/0s3yMdXvkKlBfbVvRcQt3Vw+qt+IiC2v42+nRcR5JevpTRHx9oh4KCJW\nR8Rbm13PntrVPhsR90bEHn+dPSKmRMScMtUJevd1JSL+dRfLb6t+q1IFNXTy+r4mIoZm5vZdzQ9E\nmfnFZtfwekREUDsnsKPZtbwee7Iv7fhR3sGo/ssv/UVEtGTmr3ax+izgrsz8ch/WM+Bfd9RcO/ah\nzPyDZteyLxm0I1aVloiYFxGPRsR3IuKNETE1IlZGxI8jYnFE7A+dyX1uRPwbcFVEXBoRCyLiX4AF\nEfH9iHj3jg1HxD9HxLua1bGqhgMiYlnVlzURcXZEfLHq35qIuLkKJDt9MomI0yPipxHxI2B8M/uw\nO9UnuMci4nZgDTApIh6p+nZlXbstEXF5dT/8MCLe3AfldbdvvTUivh0RD0bEDyLi7VV93e1bX6va\nrI+I8RFxVdW3b0fEftXfdY4mRMTH9qTvETGy2r9XVv/e11t3RNdRz4iYExFTqumn6vr2QET8drX8\n0oi4qK6fV1brH4+ID/RCjaOrfX5hRPwkIu6KiP2r+q6sngt/HhHvru7HhyPimxFxSESMBT4HTI+I\nFdX2zq3qfSgiboqIlurfbdVj9EhE/I+q7WcjYm21zUXVskMj4u+rZT+MiHfW3S+drzuF74ahXfvf\n5T66MSJWVfv0X9ctPyki/rXaxx6IiOFd/u7DEXF/RBxWuN69FhGzI+IzdfOXRsRFEfE/q+fDw/V9\n7Ef25D3rrdW+80hEfDmqEeTq+fiDiFgKrK2W7VgX1fPzsYj4J+DwvuhUdP9e9VRE/HVE/Kjqw47X\ny5Or/Wl1td8dVy2fEhH/UL1e/HtE/FXd9n/t+dgX/dqlzByU/4DRwHbg3dX8ncC5wJvq2nwZuKCa\nvg34FtBSzV8KPAi8sZqfDPxNNf02YFU/6OOfAfPq5g8CDq2bXwCMq+vfBOAN1K7reCwQ1f3yrWb3\nZTePYQfw+8BbgP8ERlIbab0HOKtql3X9vAq4pEn71veAY6tl7wXu2c2+9c/AfsC7qP322xnVum/W\n9eteoG1v+g78H+D91fRRwE964X7YUv0/pn4fAuYAU6rpp4C/rKbP29Guug8uquvnV6vpscA/9dJj\nlsD7qvn5wEVVfRfXtXsY+KNq+jJee87X1/s7wP8F9qvmb6j69nvAd+u2dXD1/8+A3+iy7Drgr6rp\nPwYeqrudztedPuj/vUBbtezQ6v+Wavk7gWHAOuCkat2Iah+cUj3OHwF+ABzSm8+5vejvicB9dfNr\nqb2G30ztdW8ItefkHza71i6P0Z68Z30L+Fg1PY2dn49bgaPr/m7HuvHAd6vH+C3AJmBCH/Stu/eq\np+r6MgO4pX4fq6b/BFhcTU8BngXeBLyR2ofttl09H5v5WA72EasnM/OhavpBajvuO6o0/wgwEfjd\nuvZ/lzsfCliamS/vWAf8adRGEz5J7c2y2R4BPlh94v5AZr4InBoR/1b174/ZuX8Ab6d2v/x71vbC\nr/dxzXtqfWb+EDgJuDczn8/a4ZGFwB9WbV6l9iIDrz3Ova27fesPgL+LiIeAm4Aj6tp33bf+MTO3\nUXsMW4BvV8sf4dfr35u+/wkwp6plKTAiIg7cu66+bt+o+/+UXbRZUv3fm4/f05n5L9X014H3V9N3\nAETEQdSCz33V8q/x2v1c7zRqIWpldf+eBhxDLYAcExHXRcTpwOaq/cPAwog4l9obJ9VtLwDIzHuA\nN0XEiGpd/etOSbvq/w4frUbuVlN73Tie2jVdn83MlVWtm/O1w5N/DHwe+HBmvtAL9e61zFwNHB4R\nb4nakYUXgBOAD1Hr34+ovRYe27wqu7Un71mnUHtfgtoHqXoPZOaT3Wz/D4FvZOavMvNn1D6k9YXu\n3qug++f9QdReR9cA/5ud38O+m5k/r54fS6jtw7t6PjbNYD/H6pW66V9RS7m3Ufu0/+OoHa4YU9dm\na5e/75zPzF9ExHeBM4GPUnsgmyozH4+I91D7lP/lqF38+jPUPoE+HRGXUhuhGsi6Pibd2VaFRKg9\nzn2xX3fdt94MbMrMd++ifdd+vAKQmR0RUV9/B3tW/676PgT4/cz85R5sa29tZ+fTCrruc7mL6Xo7\n7s/efPy63vaO+Ub2sXoBfC0zv/BrK2pv4v+N2gjCR6l9CPswtTe0ccBfRsQJPWx/T+tp1K76T0Qc\nTW0E66TMfCEibqPn147/oPYG9jZgVcE6S/k7aqP0v0ktPP8WcEVm3tTUqnZvT9+zdqW39qG9sov3\nKuj+ef8lYEVmfiQiRlMbPe3cVNdNs5vnY7MM9hGr7gwHnq1Gnibu4d/eAvwtsLI/fEKLiLcAv8jM\nrwNfAd5TrdpYjU50922PnwKj47VvNn2s9yst4gHgjyLisOr4+ceA+3r4m760GXgyIv4cOs9lKHUO\n3t70/TvABTtmou78wF6wHjg+In4jIg6m9omx3tl1/9/fi3X05KiI2DFi9nFqh2M7VZ+iX4jXzvGa\nRPf38/eACRFxOHSeL/Vb1TlGQzJzMXAJ8J6IGAKMyswV1EZ3DgIOpHb4bGL192OAjZm5+ddvqqjd\n9X8EtTfjF6N2nt4Z1fLHgCMi4qSq1uERseMNcD21Qzy3R0TXkfH+4A5q17adQC1k3Q18csfIbUQc\nueMx7Od29Z71Q2r3P9T62YjvA2dH7XzAI4BTy5W5a7t5r+rOQcAz1fSULus+WD3f3kjtCyX/wi6e\njyXr31OY9gI/AAAB80lEQVSDfcSqO/8L+Dfg+er/4btv/prMfDAiNgO39lJte+oE4CsR0QFsA6ZT\n29nWAP9F7QLaO8nMX0bE+cCyiPgFtRf4hu+DZsnMZyNiFrCC2ieUZZn5D00uq6uJwI0RcQm186cW\nAT9+vRvdy75/Frg+Ih6m9jz/PrVRlOKq0dE7qe13T1I71FLvkKqOV2hukH8M+ExEzKd2zs2N1IXP\nymRgbtROEF5HN9c9zcy11WP8nSo4baM2UvwycGu1DOAL1A7zfr06zBjA32bmpmo0eX51v/yiut3e\n1l3/x1V9+nFErKb2wetpam9YZOarEXE2cF31ZvYytcPMVOt/GhETqR26GZeZ/9EH/WhI1q5pOxx4\nJjOfpRZOfge4P2rf6dlC7RymDU0ssxG7es/6HLV96y+pnUrwYvd/vpNvUjuEu5baeZt99UGnu/eq\nu3bR9irga9VzbFmXdQ8Ai4FW4OuZuQpgF8/H9cV70SAvabMHqtR9L/D2HOBf+5f6QkQ8Re3Q9MYm\n1zGa2onz72hmHVIpVfh/OTMzIs6hdiL7mc2uq7dUh0HbMnNms2vpyb44YrVXovZjhpcDFxqqJElN\n9nvUvqAS1L7d98km16OKI1aSJEmF7Isnr0uSJPUKg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIK+f+8\nXRSh/iwqeQAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "tfidf for article 4 is:\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEyCAYAAAA4KJ7OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH21JREFUeJzt3X+cXXV95/HXJxNSJST8KMEqkzTQIiwVC3bEUrENfRQX\nQrPQmC5oiEFqYpKGysNHF9NiXXYl8kNhSw0QCA0slPLDJtisRKgPBUWLbQJBSEBsCloGqAa3EAga\niPPZP+6Z4WZ2JnOTfGfunZnX8/HII/d873fO/Xznnjn3fb/n3HMjM5EkSdLeG9PsAiRJkkYKg5Uk\nSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpkLHNeuCDDz44p06d\n2qyHlyRJathDDz30QmZOGqhf04LV1KlTWb9+fbMeXpIkqWER8cNG+nkoUJIkqRCDlSRJUiEGK0mS\npEKado5VM23bto1FixYxbtw4pk2bxuzZswF4/vnnueSSS8hMzjrrLN773vc2uVJJkjScjMoZq9Wr\nVzNr1ixWrFjBmjVreto///nPM2HCBMaMGUN7e3sTK5QkScPRqAxWnZ2dTJ48GYC2trae9k2bNvHh\nD3+Yiy66iM985jPNKk+SJA1TozJYtbe309nZCUBXV9dO7QceeCD77bcfP/vZz5pVniRJGqYiM5vy\nwB0dHdms61ht27aNxYsX86Y3vYkTTzyRe+65h1tuuYXHH3+cyy+/nIjgox/9qOdYSZIkACLioczs\nGLDfaAxWkiRJu6PRYNXQocCIOCUinoyIzRGxpJ8+0yLikYjYFBHf2N2CJUmShrsBL7cQEW3A1cDJ\nQCewLiLWZObjdX0OAK4BTsnMf4uIQwarYEmSpFbVyIzV8cDmzHwqM18DbgdO79XnQ8DqzPw3gMz8\ncdky99yOrvKHOgdjnZIkafhr5AKhhwLP1C13Au/p1eftwD4RcT8wAbgqM2/uvaKImA/MB5gyZcqe\n1Lvbxo4JLt3wetF1Ljlun6LrkyRJI0Opyy2MBX4DOA34z8BfRMTbe3fKzOszsyMzOyZNmlTooSVJ\nklpDIzNWzwKT65bbq7Z6ncBPMnMbsC0ivgn8OvD9IlVKkiQNA43MWK0DjoiIwyJiHHAWsKZXn78H\nToyIsRGxL7VDhU+ULVWSJKm1DThjlZk7ImIxcC/QBqzMzE0RsaC6f3lmPhER9wCPAl3ADZm5cTAL\nlyRJajWNHAokM9cCa3u1Le+1/Dngc+VKkyRJGl5G5XcFSpIkDQaDlSRJUiEGK0mSpEIMVpIkSYUY\nrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJ\nkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJ\nKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMViPAtm3bmDt3LvPmzePWW2/tab/ooos488wzWbBg\nAc899xwAH/vYxzjmmGOaVaokSSOawWoEWL16NbNmzWLFihWsWbOmp33s2LGMGzeOffbZhwMOOACA\n6667jiOPPLJZpUqSNKIZrEaAzs5OJk+eDEBbW1tP+5//+Z9zyy23cPLJJ/PXf/3XzSpPkqRRw2A1\nArS3t9PZ2QlAV1dXT/uYMbWn95BDDuHll19uSm2SJI0mBqsRYObMmaxatYqFCxcyY8YM5syZA8Bn\nP/tZFi5cyBVXXMFHPvIRAC688EI2bNjAggUL2L59ezPLliRpxInMbMoDd3R05Pr164fksS7d8HrR\n9S05bp+i65MkSa0tIh7KzI6B+jljJUmSVEhDwSoiTomIJyNic0Qs6eP+aRHxUkQ8Uv37dPlSJUmS\nWtvYgTpERBtwNXAy0Amsi4g1mfl4r64PZObvD0KN6sOOrmTsmGj5dUqSNJoMGKyA44HNmfkUQETc\nDpwO9A5WGkJjx4TnjkmS1GIaORR4KPBM3XJn1dbbb0XEoxHxlYj4tb5WFBHzI2J9RKzfsmXLHpQr\nSZLUukqdvP4wMCUz3wl8AfhSX50y8/rM7MjMjkmTJhV6aEmSpNbQSLB6Fphct9xetfXIzK2Z+Up1\ney2wT0QcXKxKSZKkYaCRYLUOOCIiDouIccBZwJr6DhHxSxER1e3jq/X+pHSxkiRJrWzAk9czc0dE\nLAbuBdqAlZm5KSIWVPcvB2YBCyNiB/BT4Kxs1pVHJUmSmqSRTwV2H95b26tted3tZcCysqVJkiQN\nL155XZIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQ\ng5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYr\nSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIk\nSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhDQWriDglIp6MiM0RsWQX/d4d\nETsiYla5EiVJkoaHAYNVRLQBVwOnAkcDH4yIo/vpdxnwD6WLlCRJGg4ambE6HticmU9l5mvA7cDp\nffQ7D1gF/LhgfZIkScNGI8HqUOCZuuXOqq1HRBwK/AFw7a5WFBHzI2J9RKzfsmXL7tYqSZLU0kqd\nvP6XwCczs2tXnTLz+szsyMyOSZMmFXpoSZKk1jC2gT7PApPrlturtnodwO0RAXAwMD0idmTml4pU\nKUmSNAw0EqzWAUdExGHUAtVZwIfqO2TmYd23I+Im4MuGKkmSNNoMGKwyc0dELAbuBdqAlZm5KSIW\nVPcvH+QaJUmShoVGZqzIzLXA2l5tfQaqzDxn78uSJEkafrzyuiRJUiEGK0mSpEIMVpIkSYUYrCRJ\nkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQV\nYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRg\nJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqS\nJKkQg5UkSVIhBitJkqRCGgpWEXFKRDwZEZsjYkkf958eEY9GxCMRsT4iTixfqiRJUmsbO1CHiGgD\nrgZOBjqBdRGxJjMfr+v2NWBNZmZEvBO4EzhqMAqWJElqVY3MWB0PbM7MpzLzNeB24PT6Dpn5SmZm\ntTgeSCRJkkaZRoLVocAzdcudVdtOIuIPIuJ7wN3AuX2tKCLmV4cK12/ZsmVP6pUkSWpZxU5ez8y7\nMvMo4AzgM/30uT4zOzKzY9KkSaUeWpIkqSU0EqyeBSbXLbdXbX3KzG8Ch0fEwXtZmyRJ0rDSSLBa\nBxwREYdFxDjgLGBNfYeI+NWIiOr2u4BfAH5SulhJkqRWNuCnAjNzR0QsBu4F2oCVmbkpIhZU9y8H\nPgB8OCJeB34KnFl3MrskSdKoMGCwAsjMtcDaXm3L625fBlxWtjRJkqThxSuvS5IkFWKwkiRJKsRg\nJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqS\nJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElS\nIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiFjm12ANJBt\n27axaNEixo0bx7Rp05g9ezYAl156KU8//TQvvPACV111Fe3t7U2uVJI02jljpZa3evVqZs2axYoV\nK1izZk1P+5IlS7juuus4++yzue+++5pYoSRJNQYrtbzOzk4mT54MQFtb2073vfLKK9x5552cccYZ\nzShNkqSdGKzU8trb2+ns7ASgq6urp33r1q0sXLiQyy+/nAkTJjSrPEmSehis1PJmzpzJqlWrWLhw\nITNmzGDOnDkAnHPOObz44ossXbqUr3/9602uUpKkBk9ej4hTgKuANuCGzLy01/2zgU8CAbwMLMzM\n7xauVaPU+PHjufHGG3uWu09eX716dbNKkiSpTwPOWEVEG3A1cCpwNPDBiDi6V7engd/JzGOAzwDX\nly5UkiSp1TVyKPB4YHNmPpWZrwG3A6fXd8jMf8zM/6gWvwP4uXdJkjTqNBKsDgWeqVvurNr680fA\nV/q6IyLmR8T6iFi/ZcuWxqvUqLCjK4fVeiVJ6q3oBUIj4iRqwerEvu7PzOupDhN2dHT4aqedjB0T\nXLrh9eLrXXLcPsXXKUlSXxoJVs8Ck+uW26u2nUTEO4EbgFMz8ydlypMkSRo+GjkUuA44IiIOi4hx\nwFnAmvoOETEFWA3Myczvly9TkiSp9Q04Y5WZOyJiMXAvtcstrMzMTRGxoLp/OfBp4BeBayICYEdm\ndgxe2ZIkSa2noXOsMnMtsLZX2/K62x8FPlq2NEmSpOHFK69LkiQVYrCShti2bduYO3cu8+bN49Zb\nb+1pX7t2LdOnT2fZsmUA/OhHP2LBggUsWLCAKVOmsHXr1maVLElqUNHLLUga2OrVq5k1axYzZszg\nzDPP7PmKnunTp7PvvvuyceNGAN7ylrewfPlyfvzjH/PTn/6UiRMnNrNsSVIDnLGShlhnZyeTJ9eu\nYNLW1jZg/5tuuom5c+cOdlmSpAIMVtIQa29vp7OzE4Curq5d9s1M7rvvPk466aShKE2StJcMVtIQ\nmzlzJqtWrWLhwoXMmDGDOXPmAPDggw9y5ZVXcscdd7Bq1SoA7r//ft73vvdRXcZEktTiPMdKGmLj\nx4/nxhtv7FnuPsfqhBNOYM2ana69y0knneRslSQNI85YSZIkFWKwkgbZjq7B+b7xwVqvJGnPeShQ\nGmRjxwSXbni9+HqXHLdP8XVKkvaOM1aSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSp\nEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlaQitm3b\nxty5c5k3bx633nprT/vatWuZPn06y5Yt62n72Mc+xjHHHNOMMiVpUBmsJBWxevVqZs2axYoVK1iz\nZk1P+/Tp07ngggt26nvddddx5JFHDnWJkjToDFaSiujs7GTy5MkAtLW1NbkaSWoOg5WkItrb2+ns\n7ASgq6urydVIUnMYrCQVMXPmTFatWsXChQuZMWMGc+bMAeDBBx/kyiuv5I477mDVqlUAXHjhhWzY\nsIEFCxawffv2ZpYtSUWNbXYBkkaG8ePHc+ONN/Ysz549G4ATTjhhp3OuAJYuXcrSpUuHtL69tW3b\nNhYtWsS4ceOYNm1az/jWrl3LsmXLmD59OosXL25ylZKazRkrSWrA7pycL2n0MlhJ2iM7unJYrXdv\neXK+pEZ4KFDSHhk7Jrh0w+vF17vkuH2Kr7OE7pPzjz322BFxcn5/hzY3btzIJZdcAsCf/dmfcfjh\nhzN//nwmTpzI2972Nj71qU81s2yp5TljJUkN2J2T84eD/g5tXnXVVVx99dVcc801fOELX+CJJ57g\nyCOP5JprruH555/nmWeeaWLVUutraMYqIk4BrgLagBsy89Je9x8F3Ai8C7gwMz9fulBJaqbdOTl/\nOOjs7Oy5+n39oc2XXnqJAw44AICXX36Z4447jrvuuotPfOITPPfcczz77LM9h0RbkTNxarYBZ6wi\nog24GjgVOBr4YEQc3avb/wX+BDBQSdIw0N91x/bff39eeukltm7dyoQJExgzZgwXX3wxV155JQce\neCCHH354s0puyEidievvK6M2btzI7NmzmT17Nhs3buTVV1/l7LPPZtGiRVx88cVNrHj0auRQ4PHA\n5sx8KjNfA24HTq/vkJk/zsx1QPkTLiRJxfV3aPPjH/845513HosXL+a8884DYNGiRcybN4+Ojg4O\nOeSQZpY9oP4+ZNA9E7f//vv3zMRt3759p5m4VjbaA+PPf/5zzj77bObNm8fcuXNb+jzHRg4FHgrU\nPzOdwHv25MEiYj4wH2DKlCl7sgpJGlQ7upKxY2LYrHdP9Xdo8x3veAc333zzTn2vueaaIa1tb/T3\nIYPumbiI2GkmDuDcc89t+Zm4kXrotjswzpgxgzPPPLNnO+wOjBHBBRdcwGWXXcaECRO49tprWbhw\nIS+++CIHHXRQk6vv25B+KjAzrweuB+jo6GjNz1RLGtVG26cdR5qZM2eyePFi7r777p6ZuFtuuaVn\nJg7oue7YokWLeP3114fFTNxoD4wTJ05k+/btnHbaabz1rW9t2VAFjQWrZ4H6uNtetUmS1FJG6kzc\naA+MDz/8MFOnTmXlypUsXbqURx55hGOPPbaJlfevkWC1DjgiIg6jFqjOAj40qFVJkooaLYc4R6rR\nHhgPP/xwrrjiChYtWsQLL7zA+eef3+TK+zdgsMrMHRGxGLiX2uUWVmbmpohYUN2/PCJ+CVgPTAS6\nIuJ84OjM3DqItUuSGjRSD3EaGIe33QmMt91225DWtqcaOscqM9cCa3u1La+7/e/UDhFKkjRkDIyt\nsV69wa+0kSSpxYzUwDga+JU2kiRpSIyGL293xkqSJA2J0TAT54yVJElSIQYrSZKkQgxWkiRJhRis\nJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmS\nJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkq\nxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKqShYBURp0TE\nkxGxOSKW9HF/RMRfVfc/GhHvKl+qJElSaxswWEVEG3A1cCpwNPDBiDi6V7dTgSOqf/OBawvXKUmS\n1PIambE6HticmU9l5mvA7cDpvfqcDtycNd8BDoiItxauVZIkqaU1EqwOBZ6pW+6s2na3jyRJ0ogW\nmbnrDhGzgFMy86PV8hzgPZm5uK7Pl4FLM/Nb1fLXgE9m5vpe65pP7VAhwJHAk6UGUsDBwAvNLmIQ\nOK7hxXENL45reHFcw0urjeuXM3PSQJ3GNrCiZ4HJdcvtVdvu9iEzrweub+Axh1xErM/MjmbXUZrj\nGl4c1/DiuIYXxzW8DNdxNXIocB1wREQcFhHjgLOANb36rAE+XH068DeBlzLz+cK1SpIktbQBZ6wy\nc0dELAbuBdqAlZm5KSIWVPcvB9YC04HNwKvARwavZEmSpNbUyKFAMnMttfBU37a87nYCf1y2tCHX\nkocoC3Bcw4vjGl4c1/DiuIaXYTmuAU9elyRJUmP8ShtJkqRCDFaSJEmFjNhgFRFTI2Jjs+toFRHx\nPyPi9/pon1Zdh0wNGqptKyJu6OProwbjcS6KiD8tsJ77I+L/+2h0RJwTEct2c10/iIiD97am0Swi\nXtnDn9urfUJ/28FwMVTbnq9Rb2yjEfG2iPi76vZu7y9aTUMnr482ETE2M3f0tzwcZeanm13D3oiI\noHZOYFeza9kbu7MtdV+UV7vWitvGSNhnSEMlM58DZjW7jlJG7IxVpS0iVkTEpoj4h4h4c0TMi4h1\nEfHdiFgVEfsCRMRNEbE8Iv4JuLx6F39LRHwbuCUivhkRx3avOCK+FRG/3qyBVTWMj4i7q7FsjIgz\nI+LT1fg2RsT11YtO9/hmVbdPiYjvRcTDwMxmjmFXqnd0T0bEzcBGYE5EPFaN7bK6fq9ExNLq9/Cd\niHjLEJTX17b1KxFxT0Q8FBEPRMRRVX19bVv/u+rzw4iYGRGXV2O7JyL2qX6u551/RHxwd8YeEZOq\n7Xtd9e+99cVHxIUR8f2I+Ba1b0Ho/XgHR8QPqtvnRMSXIuKr1bv5xRHxiYjYUD3mQXWrnhMRj1R1\nHt/7l9ZfXRHxi9XvcVNE3ABE3c98olrfxog4v2rrvW1Mjoj3R8SDEfFwRHwxIvbbq2d457qnVn8z\nN1W/t1sj4vci4tsR8S8RcXzvfUapx95bUfO56vf3WEScuav2Xj/77up5/pWo7W9WRsQ/V22nV33e\nHBG3R8QTEXEX8OZBHEsjz8NB1fb6aLV9vrP62f7a+9z2ojbLf37dYy+NiI8XHtLYagxPRMTfRcS+\n0f8+/N1V7Y90P29Ve0N/n7Hr176/ioh/jIinonqdGErRz+xdRJxW/U0f3N++oyVl5oj8B0wFdgDH\nVst3AmcDv1jX52LgvOr2TcCXgbZq+SLgIeDN1fJc4C+r228H1rfAGD8ArKhb3h84qG75FmBG3fhm\nAW+i9r2OR1DbgdwJfLnZY9nFc9gF/CbwNuDfgEnUZlq/DpxR9cu6cV4OfKpJ29bXgCOqtvcAX9/F\ntvUtYB/g16ld++3U6r676sZ1P9CxJ2MH/hY4sbo9BXiirv7fAB4D9gUmUrv+3J92P17V52DgB9Xt\nc6o+E6oaXgIWVPf9L+D8unpXVLd/G9hY9/PLdlUX8FfAp6vbp1XjOriu1vHAfsAm4Lj6baOu3m8C\n46vlT3avr/Bzfgy1N6QPASup/Q2dDnyJXvuMZv8DXqn+/wDwVWrXIXxLtS29dRft06htr79VjWdK\ntZ7PAmdXtw8Avl89L5+gdn1DgHdWv6eOQf7b29Xz8AXgv1f9fxd4pLrdX3t/295U4OGqfQzwr9S9\nfhQaSwLvrZZXUvs77G8fvhE4obp9KTv/fTXy97mr174vVmM8GtjchG10aq/xLAP+AHgAOLBq73ef\n1mr/RvqhwKcz85Hq9kPUnrx3RMTF1HYM+1G78Gm3L2bmz+uW12TmT7vvA/4iIv4bcC61jbHZHgOu\niNoMxpcz84GI+EBEXEDtRfMgai9E/6fuZ46i9nv5F4CI+Bve+P7GVvTDzPxO9e74/szcAhARt1J7\n8f4S8Bq1FwKoPc8nD0FdfW1bvwV8sXqDCfALdf17b1tfyczXI+Ixai9s91Ttj1Xrqvdudn/svwcc\nXVfLxIjYLzNfAd4H3JWZr1br6/1NCn25LzNfBl6OiJd4Y5t6jNqLabfbADLzmxExMSIO6LWePuuq\nxjOz+tm7I+I/qvtPrGrdVtW6uqp/DdW2UfX7TWovCt+u1j0OeLCBce2OpzPzsaqOTcDXMjOr53Aq\n8Ag77zNaxYnAbdX296OI+Aa1baq/9q3Af6J2DaH3Z+0wDcD7gf8Sb5yP9yZqL3C/TS2ckJmPRsSj\ngzyegZ6HX6YWGsnMr1czUhOr8fbV3ue2l5k/iIifRMRx1ILnhsz8SeGxPJOZ365u/w3wJ8DTvffh\nEfEAMCEzu7fpvwV+v249jfx97uq170tZO5T+eAzNjP9Afpfam8r3Z+bWqm1X+7SWMtKD1fa62z+n\nNkV9E7V3+9+NiHOovTvrtq3Xz/csZ+arEfFVau+K/iu1d9JNlZnfj4h3Ubvq/cVR+/LrP6b2bvGZ\niLiI2s5vOOv9nPTl9azexlB7nodiu+69bb0FeDEzj+2nf+9xbAfIzK6IqK+/i92rv7+xj6E2m/Oz\n3VjXDt44PaD3dlM/3q665d719r4wXu/lPuuq21nujvrfaQBfzcwP7smKGtTI76CR7XU4eJ7aNnAc\n0B2sAvhAZj5Z33EPn7u9MdDz8HrBx7qB2gzKL1GbUSqtr7+Xa9j9fXgj2+ZN9P/aV//zQ/6E9uFf\ngcOpjg5VbXuyT2uKkX6OVV8mAM9H7TyW2bv5szdQe2e2LjP/Y6DOgy0i3ga8mpl/A3wOeFd11wvV\nLEBfx8q/B0yNiF+plgfzhaikfwZ+pzrW3kat7m80uaZ6W6m90/xD6Dl/pdQ5eHsy9n8AzuteiLrz\nA6kdMjsjaufGTABmVO0/4I03DHt6nkX3+TsnUvvO0JcarOubwIeqtlOBA6v2B6pa942I8bxxeKC3\n7wDvjYhfrdYxPiLevodjGGkeAM6MiLaImERthuafd9EO8CK1w2KXRMS0qu1e4Ly6c36Oq9rrn7t3\nsPMMZjM8QLVvr2p/oZr16K+9v20PaofmT6E2k1c/w1PKlIg4obr9IWqnCECvfXhmvkhtNuo91f1n\n7cFj7c1r31D7IbXZxZsj4teqtl3t01rKSJ+x6stfAP8EbKn+n9DoD2bmQxGxFbhxkGrbXccAn4uI\nLmrv0hYCZ1A7Fv/v1L5AeyeZ+bOImA/cHRGvUtvZNPw7aJbMfD4ilgD3UXtHdXdm/n2Ty+ptNnBt\nRHyK2vlTtwPf3duV7uHY/wS4ujosM5bai0f393s+HBF3VLX9mDe2k88Dd3ZvH3tY7s8iYgO18Z+7\nG3X9D+C26tDOP1I736e71pt44wX/hszcEBFT61eamVuqd+G3RUT3IdhPUTsPaLS7CziB2vOdwAWZ\n+e9RO9G8r/ajADLzRxHx+8BXIuJc4DPAXwKPRsQY4Glqh6OuBW6MiCeAJ6gdkm6mi4CV1Tb2KrXz\nY3fV3ue2B5CZr0XEfdRmo+sP5ZfyJPDHEbESeJza7/JA+t6H/xGwotrff4PauVS7Y49f+5ohM78X\nEbOpnV4xg13s01qNX2mzG6oZovuBo7KFPtotSSqvCpAPA3/YfV5qE2vpOZ+oeqP11sws/SlFFTAa\nDwXukYj4MLWUf6GhSpJGtqhdnHcztZPjmxqqKqdFdSkTah/guLjZBalvzlhJkiQV4oyVJElSIQYr\nSZKkQgxWkiRJhRisJEmSCjFYSZIkFfL/AHZfOs1UIKi9AAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "tfidf for article 5 is:\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEyCAYAAAA4KJ7OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHYVJREFUeJzt3X2YXGWZ5/HvnYSoRN4iQZEEAwyCCBK1wcGREVZxMJoB\nM1HAGKM4YAjxjUVkdWVcBC9QF1c3YCQKKLIGNXEma6Lo5YIgoiYQXgIYJxtk06gQWIUYNC/k3j/q\ndKj0drqrk6e7qrq/n+vKlTrnPHXqPl1Vp371nKfOicxEkiRJu25EswuQJEkaKgxWkiRJhRisJEmS\nCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEJGNeuB991335w4cWKzHl6SJKlh\nd9555+OZOa6vdk0LVhMnTmT58uXNenhJkqSGRcTDjbTzUKAkSVIhBitJkqRCDFaSJEmFNG2MVTNs\n2LCB2bNnM3r0aE444QSmT58OwKc+9SkefPBB9tlnHy666CJe/OIXc80117BixQr22msvLrnkkiZX\nLkmS2sGw6rFatGgR06ZNY/78+SxevHjb/FGjRjF69Gh222039t57bx577DFuvPFGxowZw/7779/E\niiVJUjsZVsGqs7OTCRMmADBy5Mht8z/+8Y9z/fXXc9JJJ/G1r32NNWvWMHbsWC677DIefvhh1qxZ\n06ySJUlSGxlWwWr8+PF0dnYCsHXr1m3zR4yo/Rn2228/1q9fzwEHHMDYsWMB2HvvvVm/fv3gFytJ\nktrOsBpjNXXqVObMmcOSJUuYMmUKM2bM4Prrr+czn/kMa9eu5fHHH+dLX/oS+++/P2PHjuW8885j\n8+bNHH300c0uXZIktYHIzKY8cEdHR3qCUEmS1A4i4s7M7Oir3bA6FChJkjSQDFaSJEmFDPlgtWVr\n+UOdA7FOSZLU/ob84PVRI4LLVmwuus4LX7lb0fVJkqShYcj3WEmSJA2WId9jNRz051I9ALNmzeL5\nz38+n//855tZtiRJQ449VkNAo5fqAfjud7/LMccc06xSJUka0gxWQ0Cjl+p59NFHWbFiBW94wxua\nVaokSUOawWoIaPRSPT/96U957LHHuPjii7n55pv5zW9+05R6JUkaqhxjNQT051I973jHO/jtb3/L\n3LlzeelLX9rs0iVJGlKGxSVtPN2CJEnaFV7SRpIkaZAZrNqUZ5SXJKn1OMaqTXlGeUmSWo89VpIk\nSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIK\nMVhJkiQVYrCSJEkqxGAlSZJUSEPBKiJOjohVEbE6Ii7sYfkJEfFkRNxd/buofKmSJEmtbVRfDSJi\nJHAlcBLQCSyLiMWZ+UC3prdl5lsHoEZJkqS20EiP1bHA6sxck5mbgAXAKQNbliRJUvtpJFgdAKyt\nm+6s5nX32oi4NyJ+EBEv72lFEXF2RCyPiOXr1q3biXIlSZJaV6nB63cBB2bmK4D/DvxrT40y8+rM\n7MjMjnHjxhV6aEmSpNbQSLB6BJhQNz2+mrdNZj6VmX+ubi8FdouIfYtVKUmS1AYaCVbLgEMj4qCI\nGA2cDiyubxARL4qIqG4fW633idLFSpIktbI+fxWYmVsiYg5wEzASuCYz74+IWdXyecA04JyI2AL8\nBTg9M3MA65YkSWo5fQYr2HZ4b2m3efPqbs8F5pYtTZIkqb145nVJkqRCDFaSJEmFGKwkSZIKMVhJ\nkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJ\nKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSI\nwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOV\nJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKqShYBURJ0fEqohYHREX9tLumIjYEhHT\nypUoSZLUHvoMVhExErgSeDNwBHBGRByxg3aXAz8qXaQkSVI7aKTH6lhgdWauycxNwALglB7afQBY\nCDxWsD5JkqS20UiwOgBYWzfdWc3bJiIOAN4GfLm3FUXE2RGxPCKWr1u3rr+1SpIktbRSg9f/G/Cx\nzNzaW6PMvDozOzKzY9y4cYUeWpIkqTWMaqDNI8CEuunx1bx6HcCCiADYF5gcEVsy81+LVClJktQG\nGglWy4BDI+IgaoHqdOCd9Q0y86Cu2xFxHfB9Q5UkSRpu+gxWmbklIuYANwEjgWsy8/6ImFUtnzfA\nNUqSJLWFRnqsyMylwNJu83oMVJn5nl0vS5Ikqf145nVJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCS\nJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmS\nVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQ\ng5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYr\nSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKqShYBURJ0fEqohYHREX9rD8lIi4NyLujojlEfG6\n8qVKkiS1tlF9NYiIkcCVwElAJ7AsIhZn5gN1zX4CLM7MjIhXAN8GDh+IgiVJklpVIz1WxwKrM3NN\nZm4CFgCn1DfIzD9nZlaTY4BEkiRpmGkkWB0ArK2b7qzmbSci3hYRvwaWAGf2tKKIOLs6VLh83bp1\nO1OvJElSyyo2eD0zv5eZhwOnAp/eQZurM7MjMzvGjRtX6qElSZJaQiPB6hFgQt30+GpejzLzVuDg\niNh3F2uTJElqK40Eq2XAoRFxUESMBk4HFtc3iIi/iYiobr8KeA7wROliJUmSWlmfvwrMzC0RMQe4\nCRgJXJOZ90fErGr5POCfgHdHxGbgL8BpdYPZJUmShoU+gxVAZi4FlnabN6/u9uXA5WVLkyRJai+e\neV2SJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOV\nJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mS\npEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmF\nGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCmko\nWEXEyRGxKiJWR8SFPSyfHhH3RsR9EfHziDi6fKmSJEmtrc9gFREjgSuBNwNHAGdExBHdmj0EvD4z\njwI+DVxdulBJkqRW10iP1bHA6sxck5mbgAXAKfUNMvPnmfnHavIXwPiyZUqSJLW+RoLVAcDauunO\nat6OvA/4QU8LIuLsiFgeEcvXrVvXeJWSJEltoOjg9Yg4kVqw+lhPyzPz6szsyMyOcePGlXxoSZKk\nphvVQJtHgAl10+OreduJiFcAXwXenJlPlClPkiSpfTTSY7UMODQiDoqI0cDpwOL6BhFxILAImJGZ\nvylfpiRJUuvrs8cqM7dExBzgJmAkcE1m3h8Rs6rl84CLgBcAV0UEwJbM7Bi4sjUcbNiwgdmzZzN6\n9GhOOOEEpk+fDsDSpUuZO3cukydPZs6cOTzzzDPMnDmT5z3veWzatIlrr72WESM8RZskafA19OmT\nmUsz86WZeUhmXlrNm1eFKjLznzNzn8ycVP0zVGmXLVq0iGnTpjF//nwWL362k3Ty5MlccMEF26bX\nr1/PHnvswfz589l9993505/+1IxyJUnyzOtqXZ2dnUyYUBveN3LkyB2223PPPdm4cSNvectb2Lx5\nM2PHjh2sEnfKhg0bmDlzJmeddRY33HDDtvlLly5l8uTJzJ07F4BHH32UWbNmMWvWLA488ECeeuqp\nZpUsSWqQwUota/z48XR2dgKwdevWHba76667mDhxIkuWLOGggw7i7rvvHqwSd0qjPXEvfOELmTdv\nHhdffDEnnngie+65ZzPKlST1g8FKLWvq1KksXLiQc845hylTpjBjxgwA7rjjDq644gpuvPFGFi5c\nyBFHHMGDDz7I7Nmzueeeezj00EObXHnvGu2J63Ldddcxc+bMgS5LklRAI6dbkJpizJgxXHvttdum\nuwavH3fccdv19AB861vfGtTadkVXT9ykSZN67YkDyExuvvlmPvrRjw5SdZKkXWGPlTTIGu2JA7jl\nlls4/vjjqX5tK0lqcZGZTXngjo6OXL58+aA81mUrNhdd34Wv3K3o+nbWUN0uSZJaTUTc2chZD+yx\nUsvYsnVgQv5ArVeSpO4cY6WWMWpEFO+FA3viJEmDxx4raYDZEydJw4c9VtIAsydOkoYPe6wkSZIK\nMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKw\nkiRJKsRgJUmSVIjBSpIkqRCDlaQiNmzYwMyZMznrrLO44YYbts1funQpkydPZu7cuQCsWrWKM888\nk/e+971cfvnlzSpXkgbEqGYXIGloWLRoEdOmTWPKlCmcdtppTJ8+HYDJkyez++67s3LlSgAOO+ww\nrrnmGgCmTp3atHolaSDYYyWpiM7OTiZMmADAyJEj+2y/YMEC3vSmNw10WZI0qAxWkooYP348nZ2d\nAGzdurXXtgsWLODhhx9m1qxZg1GaJA0ag5WkIqZOncrChQs555xzmDJlCjNmzADgjjvu4IorruDG\nG29k4cKFrFixgvPPP5+HHnqI8847r8lVS1JZjrGSVMSYMWO49tprt013jbE67rjjWLx48XZtu3q2\nJGmoscdKknrR6K8dAd7//vdz1FFHNaNMSS3CYCVJvej6teP8+fO363mbPHkyF1xwwXZtv/KVr3DY\nYYcNdomSWojBStJO2bI122q9O6u/v3aUNLw5xkrSThk1Irhsxebi673wlbsVX+eu6Pq146RJk/r8\ntaMk2WMlSb1o9NeOAJ/4xCdYsWIFs2bNYuPGjc0su2H9GUMmqW/2WElSL/rza8dLL72USy+9dFDr\n21WNnjFfUmPssZKkYWyojiGzJ07NYrCSpGGsP2fMbyf9+TWnVFJDwSoiTo6IVRGxOiIu7GH54RFx\nR0RsjIjzy5cpSYNjuPzasUt/xpC1k6HWE7ejHriVK1cyffp0pk+fzsqVK3n66ad517vexezZs7nk\nkkuaWPHw1ecYq4gYCVwJnAR0AssiYnFmPlDX7P8CHwROHZAqJWmQDJdfO3bpzxiydjLUfs25o7Fw\nX/ziF7nyyiuJCC644ALOPvtsDjvsMD75yU9y7rnnsnbt2m0BU4OjkR6rY4HVmbkmMzcBC4BT6htk\n5mOZuQwovzeSJKmfhlpP3I564J588kn23ntv9tprL9avX88rX/lKNm7cyHnnncfvfvc7HnnkkWaV\n3JCh2BPXyK8CDwDW1k13Aq8ZmHIkSQNhy9Zk1Ihom/XuqqHWE7ejHri99tqLJ598kohgjz32YMSI\nEduCx5lnnsnBBx/crJIbMhR74gb1dAsRcTZwNsCBBx44mA8tScPacDvEOdRMnTqVOXPmsGTJkm09\ncNdffz0f+tCH+MAHPgCwbVD+7Nmz2bx5Mx0dHey3337NLLtPnZ2d266v2VNPHLCtJ+573/vedj1x\n7RysHgHqqx9fzeu3zLwauBqgo6OjNUdySpLUYnbUA3fkkUfyjW98Y7u2V1111aDWtiuGYk9cI8Fq\nGXBoRBxELVCdDrxzQKuSJKkBw+0Q51AzFHvi+gxWmbklIuYANwEjgWsy8/6ImFUtnxcRLwKWA3sC\nWyPiw8ARmfnUANYuSRrmhuohzuESGIdiT1xDY6wycymwtNu8eXW3/0DtEKEkSdpFQzUwDgeeeV2S\nJKkQg5UkSRoUw+HKBoN6ugVJkjR8DYdDnPZYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEG\nK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaS\nJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmS\nCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVi\nsJIkSSqkoWAVESdHxKqIWB0RF/awPCLiS9XyeyPiVeVLlSRJam19BquIGAlcCbwZOAI4IyKO6Nbs\nzcCh1b+zgS8XrlOSJKnlNdJjdSywOjPXZOYmYAFwSrc2pwDfyJpfAHtHxP6Fa5UkSWppjQSrA4C1\nddOd1bz+tpEkSRrSIjN7bxAxDTg5M/+5mp4BvCYz59S1+T5wWWb+rJr+CfCxzFzebV1nUztUCHAY\nsKrUhhSwL/B4s4sYAG5Xe3G72ovb1V7crvbSatv1kswc11ejUQ2s6BFgQt30+Gpef9uQmVcDVzfw\nmIMuIpZnZkez6yjN7Wovbld7cbvai9vVXtp1uxo5FLgMODQiDoqI0cDpwOJubRYD765+Hfi3wJOZ\n+fvCtUqSJLW0PnusMnNLRMwBbgJGAtdk5v0RMataPg9YCkwGVgNPA+8duJIlSZJaUyOHAsnMpdTC\nU/28eXW3Ezi3bGmDriUPURbgdrUXt6u9uF3txe1qL225XX0OXpckSVJjvKSNJElSIQYrSRoAETEx\nIt7Z7Dp2VrvXLzWLwaqbiLg4It7Yw/wTqvN1NVW1s1vZ7DoaMVi1RsRXe7jMkgZRf57rVn+P1YuI\nT0XE+Ttxv65Lgd1Zbddry1e36yLi59X/24Wo+vqbVdvO6G1f0O77ia7nqp/3ObWVtrmdPr92RUOD\n14eTzLyo2TUMlIgYlZlbdjTdSvpTW9fJa9tRRAS1sY5bm13LYIiIkUP5PVbnb4DLM3NVRJwB/Bno\n9wfjQMvMrsA3EXgn8D+q6W31N6OunbWjfUH1umvb/QRs91z1x6nA94EHCpejXgyLHquIGBMRSyLi\nnohYGRGnRcRFEbGsmr66+oAjIq6rzjZPRJwcEb+OiLuAqU3diO2NjIj5EXF/RPwoIp4XEWdV23NP\nRCyMiN1h2/bMi4hfAp+tvoFfHxG3A9dHxK0RMalrxRHxs4g4eoBrPSQifhgRd0bEbRFxeC+1fr1q\n83BETI2Iz0bEfdX9d6vud0tEdFS3z6iWr4yIy+u2688RcWn19/lFRLywmj+u+nstq/79XcFt71H1\nrW1VRHwDWAnM6E/NA1zXyrrp86vn4JaI+EJELI+IByPimIhYFBH/HhGX1K1iVETcULX5bt1r8LcR\ncXn1Pnp7I++x6nn5cfW6+Wr1/O9bLXtXRPwqIu6OiK9ErXel5N/hExHxm4j4GbUrRBARk6rn4N6I\n+F5E7FPNv6Xatl9V9zm+Ws3+wAURMRGYBXykqvf4Hh6yaSLiz9XNy4Djqxo/QlV/1eb11fy7I2JF\nROzRrHrrRc/79fp9wZ8j4r9GxD3Acd2X1a1nWkRcV91+e7WueyLi1mZs145U27Ndr25EzI2I91S3\nL4uIB6rX6Oej1kv6j8DnqufukCaV3qOIOLh6PX202p/8sNqnfLauzf+3P6+eoyuq2x+KiDV167u9\nOVuzvWERrICTgd9l5tGZeSTwQ2BuZh5TTT8PeGv9HSLiucB8YArwauBFg1xzbw4FrszMlwN/Av4J\nWFRtz9HAg8D76tqPB16bmedV00cAb8zMM4CvAe8BiIiXAs/NzHsGuNargQ9k5quB84Greqn1EOA/\nUNtBfBO4OTOPAv4CvKX+gSLixcDlVftJwDERcWq1eAzwi+rvcytwVjX/i8AXMvOYqravltrwPhxK\nbbtPAj7dz5qbYVN1BuR5wL9RO73KkcB7IuIFVZvDgKsy82XAU8Dsuvs/kZmvyswFXTP6eI/9C/C/\nqtfNd4EDq/u8DDgN+LvMnAQ8A0wvtZER8WpqJ0GeRO3cfMdUi75B7TJdrwDuq+rrMiozjwU+3G0+\nmflban+zL2TmpMy8rVSthV0I3FbV+IVuy84Hzq3+3sdTe++1gp726/XGAL+slv+swXVeBPxD9Z77\nx4K1DqjqPfg24OXVa/SSzPw5tZN3f7R6Xv93U4usExGHAQupffaso/Z+Ow04CjgtIib0sj+/jdrr\nkOr/JyLigOp2S4Th4RKs7gNOqr5ZHp+ZTwInRsQvI+I+ak/cy7vd53Dgocz89+o8Xd8c5Jp781Bm\n3l3dvpNaN/6RUevZuY/aB0399nwnM5+pm16cmV07x+8Ab41a78+ZwHWDUOtrge9ExN3AV6h9O95R\nrT/IzM3UnsORPLvzvK9aV71jgFsyc111GPEG4O+rZZuodYnX1wHwRmBuVctiYM+IeP7ObWq/PJyZ\nv9jJmpuh62oL9wH3Z+bvM3MjsIZnL2e1NjO7vjF+E3hd3f1v7GGdvb3HXgcsAMjMHwJ/rOa/gVoI\nW1Y9Z28ADt6lLdve8cD3MvPpzHyK2naPAfbOzJ9Wbb7Os88RwKLq/2Y/RwPlduCKiPggtb9Dqwwf\n6Gm/Xu8Zah/e/XE7cF1EnEVtf9MungT+CnwtIqZSO1F3qxpH7cvZ9Lov8T/JzCcz86/UDlu+hB3s\nGzPzD8Dzq57TCdQOX/89tfduS3xxGRZjrDLzNxHxKmrfQC+J2kWizwU6MnNtRHwKeG4za+ynjXW3\nn6HW43YdcGpm3lN1DZ9Q12ZDt/tvm87MpyPix8ApwDuofWgNZK0vBP5UffvtSfdaN1Z1bo2Izfns\nide20r/Xb/19n6m77wjgb6s39GDqvp092VHNA2UL23/Zqn9PdD2PW9n+Oa1/HrqfFK9+upHtbUQA\nX8/M/1RofSV0/T0G4zkadJl5WUQsobb/vD0i/iEzf90CdfW0X6/3125f0ra7e93tba/zzJwVEa+h\n1ht+Z0S8OjOfKFr4runxPVpdIeVYal80pgFzqHUYtKIngf9D7ctT19iv7p8Tfb2Pfk7tCi+rqIWp\nM4HjgP9YtNKdNCx6rKouxacz85vA54BXVYser3onpvVwt18DE+uOS58x8JXukj2A31c9T/09NPJV\n4EvAssz8Y1+Nd9FTwEMR8XaoDd6OcmO6fgW8PiL2jdq4mzOAn/Zxnx8BH+iaiLrxZoNkZ2oeKI8C\n+0XECyLiOXQ7PN6AAyPiuOr2O4G+Dr/09h67nVrQJyLeBOxTzf8JMC0i9quWjY2Il/Szzt7cCpwa\ntbGAe1A7TLkB+GPd+KgZ9O85Wk/t/dnKdlhjRBySmfdl5uXUrh17+KBWtgO97Ncb8WhEvCwiRlA7\nhNa1zkMy85fVDyzW8WxvbKt4GDgiIp4TEXtTC1JUn2N7VVdJ+QjQtU9txdfeJmp/83dH76fz6G3f\neBu1Q9S3AiuAE4GNPfRaNsWwCFbUjtv+qjp08C/AJdTGdqykdg3EZd3vUPVgnA0sidrA2scGr9yd\n8kngl9Q+kPr1bTIz76QWeK4dgLp6Mh14XzWo9H5qvWW7LGsX/r4QuBm4B7gzM/+tj7t9EOioBnw+\nQG2g8aDZyZoHqpbNwMXUdmg/pp+vI2rfHs+NiAepBaEv9/F4vb3H/gvwpqgNpn878AdgfWY+APxn\n4EcRcW9V5/4Ukpl3UTtseQ/wA57dN8ykNgj4XmrjPS7ux2r/J/C2aMHB63XuBZ6pBm1/pNuyD1eD\nh+8FNlP7u7SCnvbrjbqQ2mH2nwO/r5v/ua7B0tWykuNNd1Vm5lrg29Q+u75NLVRALTx9v3qOfgZ0\njVFdAHy0GiTeMoPXM3MDtS9uHwH23EGb3vaNt1ELvbdWvZJr6fuL3KDxkjbq+uZ3C3B4DpOf/au1\nVT1mz1SHOI4DvtzL4WNpSKsGp9+VmSV7ZzVAhtx4APVPRLwbuBQ4z1ClFnIg8O3qUM0mmvuLSKlp\n6r74fr7JpahB9lhJkiQVMlzGWEmSJA04g5UkSVIhBitJkqRCDFaSJEmFGKwkSZIK+X9gbc+jPana\ncQAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "tfidf for article 6 is:\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEyCAYAAAA4KJ7OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+cFfV97/HXhwUSJSAYMVXBognRUG3Ubk1MtcWbkoeS\nUM1KrhokmCYSoGvTy02R/LjGtpqoTbhNghQlopXaqC20oUI0uYnW/NAEFNRVYsrDaFnijZiriGgQ\nwvf+MbPrYd1lD8t39+xZXs/Hgwfz47szn+/OnNn3mZkzJ1JKSJIkaf8NqnUBkiRJA4XBSpIkKROD\nlSRJUiYGK0mSpEwMVpIkSZkYrCRJkjIxWEmSJGVisJIkScrEYCVJkpTJ4Fqt+LDDDkvjxo2r1eol\nSZKq9uCDDz6XUhrdXbuaBatx48axdu3aWq1ekiSpahHxdDXtvBQoSZKUicFKkiQpE4OVJElSJjW7\nx6oWtm/fzpw5cxg6dCgTJ05k2rRpAFxxxRVs2LCBUaNGcfnllzNy5EhmzpzJiBEjOPLII/nc5z5X\n48olSVI9qOqMVUScFRFPRMTGiJjfRZuJEbE+Ih6LiP/IW2YeK1asYOrUqSxZsoSVK1e2Tx88eDBD\nhw5lyJAhjBw5kg0bNnDcccexaNEinnnmGTZt2lTDqiVJUr3oNlhFRANwHXA2MAG4MCImdGgzElgE\n/ElK6XeAD/VCrfuttbWVsWPHAtDQ0NA+/TOf+QzLli1j0qRJ3HjjjZx88sns2LGDuXPn8otf/ILN\nmzfXqmRJklRHqjljdSqwMaX0ZErpVeA24JwObT4MrEgp/RdASunZvGXmMWbMGFpbWwHYvXt3+/RB\ng4pfw+GHH862bdsYNGgQV155JQsWLGDUqFEce+yxNalXkiTVl2rusToKqLwW1gq8q0ObtwNDIuJe\nYDjwlZTSLR0XFBEzgZkARx99dE/q3S9NTU00NzezatUqpkyZwvTp01m2bBlf+MIX2LRpE8899xxf\n/epXAZgzZw47d+6ksbGRww8/vM9rlSRJ9SdSSntvEDEVOCul9PFyfDrwrpRSc0WbhUAj8F7gIOB+\n4P0ppZ91tdzGxsbkA0IlSVI9iIgHU0qN3bWr5ozVZmBsxfiYclqlVuBXKaXtwPaIuA94J9BlsJIk\nSRpoqrnHag0wPiKOiYihwAXAyg5tvgmcHhGDI+JgikuFG/KWKkmS1L91e8YqpbQrIpqBu4EGYGlK\n6bGImFXOX5xS2hARdwGPALuBr6eUWnqz8Grt2p0YPCj6/TIlSVL96/Yeq97Sl/dYXb1uZ9blzT95\nSNblSZKk/q3ae6z8ShtJkqRMDFaSJEmZGKwkSZIyMVhJkiRlYrCSJEnKxGAlSZKUicFKkiQpE4OV\nJElSJgYrSZKkTAxWkiRJmRisJEmSMjFYSZIkZWKwkiRJysRgJUmSlInBSpIkKRODlSRJUiYGK0mS\npEwMVpIkSZkYrCRJkjIxWEmSJGVisJIkScpkcK0L0P7bvn07c+bMYejQoUycOJFp06YBcMUVV7Bh\nwwZGjRrF5ZdfzrZt27jmmmtIKXH88cdz2WWX1bhySZIGFoPVALBixQqmTp3KlClTOP/889uD1eDB\ngxk6dChDhgxh5MiRHHnkkSxduhSApqamWpYsSdKA5KXAAaC1tZWxY8cC0NDQ0D79M5/5DMuWLWPS\npEnceOON7dNvu+023ve+9/V5nZIkDXQGqwFgzJgxtLa2ArB79+726YMGFZv38MMPZ9u2bUARqp5+\n+mlmzZrV94VKkjTAGawGgKamJpYvX87s2bOZMmUK06dPB+ALX/gCs2fP5stf/jIf/ehHWbduHZ/6\n1Kf4+c9/zty5c2tctSRJA0+klGqy4sbGxrR27do+WdfV63ZmXd78k4dkXZ4kSerfIuLBlFJjd+08\nYyVJkpSJwapO7dqd/0xjbyxTkqQDiY9bqFODB4WXOCVJ6mc8YyVJkpSJwUqSJCkTg5UkSVImBitJ\nkqRMDFaSJEmZVBWsIuKsiHgiIjZGxPxO5k+MiK0Rsb78d3n+UiVJkvq3bh+3EBENwHXAJKAVWBMR\nK1NKj3do+v2U0gd6oUZJkqS6UM0Zq1OBjSmlJ1NKrwK3Aef0blmSJEn1p5pgdRSwqWK8tZzW0Xsi\n4pGI+FZE/E5nC4qImRGxNiLWbtmypQflSpIk9V+5bl5/CDg6pfS7wNeAf+usUUrphpRSY0qpcfTo\n0ZlWLUmS1D9UE6w2A2MrxseU09qllF5MKb1UDq8GhkTEYdmqlCRJqgPVBKs1wPiIOCYihgIXACsr\nG0TEb0VElMOnlsv9Ve5iJUmS+rNuPxWYUtoVEc3A3UADsDSl9FhEzCrnLwamArMjYhfwCnBBSin1\nYt2SJEn9TrfBCtov763uMG1xxfBCYGHe0iRJkuqLT16XJEnKxGAlSZKUicFKkiQpE4OVJElSJgYr\nSZKkTAxWkiRJmRisJEmSMjFYSZIkZWKwkiRJysRgJUmSlInBSpIkKRODlSRJUiYGK0mSpEwMVpIk\nSZkYrCRJkjIxWEmSJGVisJIkScrEYCVJkpSJwUqSJCkTg5UkSVImBitJkqRMDFaSJEmZGKwkSZIy\nMVhJkiRlYrCSJEnKxGAlSZKUicFKkiQpE4OVJElSJgYrSZKkTAxWkiRJmRisJEmSMjFYSZIkZWKw\nkiRJysRgJUmSlElVwSoizoqIJyJiY0TM30u734+IXRExNV+JkiRJ9aHbYBURDcB1wNnABODCiJjQ\nRbtrgG/nLlKSJKkeVHPG6lRgY0rpyZTSq8BtwDmdtLsUWA48m7E+SZKkulFNsDoK2FQx3lpOaxcR\nRwEfBP4+X2mSJEn1JdfN638HXJZS2r23RhExMyLWRsTaLVu2ZFq1JElS/zC4ijabgbEV42PKaZUa\ngdsiAuAwYHJE7Eop/Vtlo5TSDcANAI2NjamnRUuSJPVH1QSrNcD4iDiGIlBdAHy4skFK6Zi24Yi4\nGbizY6iSJEka6LoNVimlXRHRDNwNNABLU0qPRcSscv7iXq5RkiSpLlRzxoqU0mpgdYdpnQaqlNLF\n+1+WJElS/fHJ65IkSZkYrCRJkjIxWEmSJGVisJIkScrEYCVJkpSJwUqSJCkTg5UkSVImBitJkqRM\nDFaSJEmZGKwkSZIyMVhJkiRlYrCSJEnKxGAlSZKUicFKkiQpE4OVJElSJgYrSZKkTAxWkiRJmRis\nJEmSMjFYSZIkZWKwkiRJysRgJUmSlInBSpIkKRODlSRJUiYGK0mSpEwMVpIkSZkYrCRJkjIxWEmS\nJGVisJIkScrEYCVJkpSJwUqSJCkTg5UkSVImBitJkqRMDFaSJEmZGKwkSZIyMVhJkiRlUlWwioiz\nIuKJiNgYEfM7mX9ORDwSEesjYm1EnJ6/VEmSpP5tcHcNIqIBuA6YBLQCayJiZUrp8Ypm3wVWppRS\nRPwucAdwfG8ULEmS1F9Vc8bqVGBjSunJlNKrwG3AOZUNUkovpZRSOToMSEiSJB1gqglWRwGbKsZb\ny2l7iIgPRsRPgVXAn+YpT5IkqX5ku3k9pfSvKaXjgXOBv+msTUTMLO/BWrtly5Zcq5YkSeoXqglW\nm4GxFeNjymmdSindBxwbEYd1Mu+GlFJjSqlx9OjR+1ysJElSf1ZNsFoDjI+IYyJiKHABsLKyQUS8\nLSKiHD4FeAPwq9zFSpIk9WfdfiowpbQrIpqBu4EGYGlK6bGImFXOXwycB3wkInYCrwDnV9zMLkmS\ndEDoNlgBpJRWA6s7TFtcMXwNcE3e0iRJkuqLT16XJEnKxGAlSZKUicFKkiQpE4OVJElSJgYrSZKk\nTAxWkiRJmRisJEmSMjFYSZIkZWKwkiRJysRgJUmSlInBSpIkKRODlSRJUiYGK0mSpEwMVpIkSZkY\nrCRJkjIxWEmSJGVisJIkScrEYCVJkpSJwUqSJCkTg5UkSVImBitJkqRMDFaSJEmZGKwkSZIyMVhJ\nkiRlYrBSv7V9+3ZmzJjBJZdcwq233to+/eqrr+YTn/gE5513Hq2trQB84hOf4MQTT6xVqZIkAQYr\n9WMrVqxg6tSpLFmyhJUrV7ZPnz9/Ptdffz0XXXQR99xzDwDXX389xx13XK1KlSQJMFipH2ttbWXs\n2LEANDQ07DHvpZde4o477uDcc8+tRWmSJHXKYKV+a8yYMe2X+nbv3t0+/cUXX2T27Nlce+21DB8+\nvFblSZL0OgYr9VtNTU0sX76c2bNnM2XKFKZPnw7AxRdfzAsvvMBVV13F9773PQA++9nPsm7dOmbN\nmsWOHTtqWbYk6QAWKaWarLixsTGtXbu2T9Z19bqdWZc3/+QhWZfXUwO1X5Ik9TcR8WBKqbG7dp6x\nkiRJysRgJUmSlInBSv3Grt29c1m6t5YrSVJHg2tdgNRm8KDIft8YeO+YJKnveMZKkiQpk6qCVUSc\nFRFPRMTGiJjfyfxpEfFIRDwaET+KiHfmL1WSJKl/6zZYRUQDcB1wNjABuDAiJnRo9nPgj1JKJwJ/\nA9yQu1BJkqT+rpozVqcCG1NKT6aUXgVuA86pbJBS+lFK6fly9AFgTN4ypYGjqy+XXr16NZMnT2bh\nwoUAvPzyy1x00UXMmTOHK6+8slblSpL2QTXB6ihgU8V4azmtKx8DvtXZjIiYGRFrI2Ltli1bqq9S\nGkC6+nLpyZMnM2/evPbxDRs2cNxxx7Fo0SKeeeYZNm3a1NniJEn9SNab1yPiTIpgdVln81NKN6SU\nGlNKjaNHj865aqlu7O3LpSudfPLJ7Nixg7lz5/KLX/yCzZs391WJkqQeqiZYbQbGVoyPKaftISJ+\nF/g6cE5K6Vd5ypMGnq6+XLqjQYMGceWVV7JgwQJGjRrFscce21clSpJ6qJrnWK0BxkfEMRSB6gLg\nw5UNIuJoYAUwPaX0s+xVSgNIU1MTzc3NrFq1qv3LpZctW8b999/PggULeP755zniiCM477zzmDNn\nDjt37qSxsZHDDz+81qVLkrrRbbBKKe2KiGbgbqABWJpSeiwiZpXzFwOXA28GFkUEwK5qvqhQOhAN\nGzaMm266qX182rRpAJx22ml73HMFsGjRoj6tTZK0f6p68npKaTWwusO0xRXDHwc+nrc0SZKk+uKT\n16Ve5ncgStKBw+8KlHrZgfIdiNu3b2fOnDkMHTqUiRMntl/iXL16NQsXLmTy5Mk0Nzfzy1/+ks9/\n/vPt81paWhgxYkQtS5ekbAxWkrJoez7XlClTOP/889uD1eTJkzn44INpaWkB4C1veQuLFy/m2Wef\n5ZVXXjFUSRpQvBQoKYtqn8/V5uabb2bGjBm9XZYk9SmDlaQsqn0+F0BKiXvuuYczzzyzL0qTpD5j\nsJKURVNTE8uXL2f27Nntz+cC2p/Pdfvtt7N8+XIA7r33Xs444wzKx7NI0oDhPVaSstiX53OdeeaZ\nnq2SNCB5xkqSJCkTg5UkSVImBitJPeKDTyXp9bzHSlKP+ODTPR98CvDWt76VSZMmccoppzBz5sxa\nli2pRjxjJUl70fbg0yVLluxxE/7kyZOZN2/eHm3f9KY38corr7Q/z0vSgcczVpK0F62trZx44olA\n9w8+XbduHSklPvCBD3D22Wf3RXmS+hnPWEnSXuzLg08HDRpEQ0MDb3jDG7ptK2lg8oyVJO1FU1MT\nzc3NrFq1qv3Bp8uWLWt/8Onzzz/PEUccwQknnMA111wDwMSJExk0yPet0oHIYCVJe7EvDz5dunRp\nn9Ymqf/xLZUkSVImBitJkqRMDFaSVMEHn0raH95jJUkVDpQHn0rqHZ6xkiRJysRgJUmSlInBSpIk\nKRODlSRJUiYGK0mSpEwMVpIkSZkYrCRJkjIxWEmSJGVisJIkScrEYCVJkpSJwUqSJCkTg5UkSVIm\nBitJOoBt376dGTNmcMkll3Drrbe2T1+9ejWTJ09m4cKFNaxOqj8GK0k6gK1YsYKpU6eyZMkSVq5c\n2T598uTJzJs3r4aVSfXJYCVJB7DW1lbGjh0LQENDQ42rkepfVcEqIs6KiCciYmNEzO9k/vERcX9E\n7IiIT+UvU5LUG8aMGUNraysAu3fvrnE1Uv3rNlhFRANwHXA2MAG4MCImdGj2/4A/B76UvUJJUq9p\nampi+fLlzJ49mylTpjB9+nQA7r//fhYsWMDtt9/O8uXLa1ylVD8GV9HmVGBjSulJgIi4DTgHeLyt\nQUrpWeDZiHh/r1QpSeoVw4YN46abbmofnzZtGgCnnXbaHvdcSapONcHqKGBTxXgr8K6erCwiZgIz\nAY4++uieLEKSpC5t376dOXPmMHToUCZOnNgeFFtaWvjiF78IwKc//Wne8Y53MGPGDA466CBeffVV\nbrrpJgYN8rZj7b8+3YtSSjeklBpTSo2jR4/uy1VLkg4AXX3K8Stf+QrXXXcdixYt4mtf+xrbtm1j\n+PDhLFmyhIMPPpgXXnihhlV3r6vHYrS0tDBt2jSmTZtGS0sLv/nNb7jooou45JJLmDFjRr+/b24g\n9quaYLUZGFsxPqacJkmqE7t2p7pabk919SnHrVu3MnLkSA455BC2bdvGiBEj2LFjB+9///vZuXMn\nhx56aK1KrspADYwDsV/VXApcA4yPiGMoAtUFwId7tSpJUlaDBwVXr9uZfbnzTx6SfZn7o+1Tjied\ndNIeZzUOOeQQtm7dSkQwfPhwHnroIcaNG8fSpUu56qqrWL9+PSeddFINK9+71tZWTjzxRKDzwAi8\nLjAeccQR/T4wDsR+dXvGKqW0C2gG7gY2AHeklB6LiFkRMQsgIn4rIlqBucDnIqI1Ikb0ZuGSJHXU\n1accP/nJT3LppZfS3NzMpZdeyoQJE9iwYQNz5szh4YcfZvz48TWufO+6eixGW2B88cUX9wiMq1at\n4phjjmH9+vW1KrkqA7FfkVJtTuM2NjamtWvX9sm6cr9L6y/v0AZivwbqO2r7VT371XsGar8OBNu3\nb6e5uZk3vvGNnH766dx1110sW7aMlpYWrr32WgDmzZvHsccey8c+9jFGjRrFc889x0033cSwYcNq\nXH3X6qlfEfFgSqmxu3bVXAqUJEk11NVjMU444QRuueWWPdp+4xvf6NPa9sdA7JefLZUkScrEYCVJ\nqlsD9dOOA7VfBwIvBUqS6tZA/bTjQO3Xrt2JwYOibpbbEwYrSZLUJwZqYKzkpUBJkqRMDFaSJEmZ\nGKwkSZIyMVhJkiRlYrCSJEnKxGAlSZKUicFKkiQpE4OVJElSJgYrSZKkTAxWkiRJmRisJEmSMjFY\nSZIkZWKwkiRJysRgJUmSlInBSpIkKRODlSRJUiYGK0mSpEwMVpIkSZkYrCRJkjIxWEmSJGVisJIk\nScrEYCVJkpSJwUqSJCkTg5UkSVImBitJkqRMDFaSJEmZGKwkSZIyMVhJkiRlYrCSJEnKpKpgFRFn\nRcQTEbExIuZ3Mj8i4qvl/Eci4pT8pUqSJPVv3QariGgArgPOBiYAF0bEhA7NzgbGl/9mAn+fuU5J\nkqR+r5ozVqcCG1NKT6aUXgVuA87p0OYc4JZUeAAYGRFHZK5VkiSpX6smWB0FbKoYby2n7WsbSZKk\nAS1SSntvEDEVOCul9PFyfDrwrpRSc0WbO4GrU0o/KMe/C1yWUlrbYVkzKS4VAhwHPJGrIxkcBjxX\n6yJ6gf2qL/arvtiv+mK/6kt/69dvp5RGd9docBUL2gyMrRgfU07b1zaklG4AbqhinX0uItamlBpr\nXUdu9qu+2K/6Yr/qi/2qL/Xar2ouBa4BxkfEMRExFLgAWNmhzUrgI+WnA98NbE0pPZO5VkmSpH6t\n2zNWKaVdEdEM3A00AEtTSo9FxKxy/mJgNTAZ2Ai8DHy090qWJEnqn6q5FEhKaTVFeKqctrhiOAF/\nlre0PtcvL1FmYL/qi/2qL/arvtiv+lKX/er25nVJkiRVx6+0kSRJysRgJUmSlMmADVYRMS4iWmpd\nR38REX8dEX/cyfSJ5XPIcq7rioj4VIbl3BsRr/uobURcHBEL93FZT0XEYftb04Gqq21R5c9m38f2\ncf19ciyIiK938nVfdaPtdduXx4r9dSAd5yPiRz34mXPreZ+sV1XdvH6giYjBKaVdXY3Xo5TS5bWu\noV5ERFDcf7h7IK2rXkVEQ0rpNzVad9Wv/baHKNc7jxX9U0rpPT34sXOBO4HHM5ejvRiwZ6xKDRGx\nJCIei4hvR8RBEXFJRKyJiIcjYnlEHAwQETdHxOKI+DFwbfnubVlE/BBYFhH3RcRJbQuOiB9ExDtr\n1bGyhmERsarsS0tEnB8Rl5f9a4mIG8o/3G39m1oOnxURP42Ih4CmTLV8NiJ+FhE/oHiq/h5nOSLi\nsIh4qhy+OCL+LSK+U55Jao6IuRGxLiIeiIhDKxY9PSLWl/05tZP1ji6345ry3x+U099cbvPHIuLr\nQFT8zNxyeS0R8RfltHER8URE3AK0AGMj4n0RcX9EPBQR/xwRb8r0u+q4rukR8WhZzzUV7V6KiKvK\n7ftARLwlx/qrqO91+1WH+S9VDE+NiJvL4beWdT4aEVdWtgPeFBH/Uu53t1bsl+8tt/ujEbE0It5Q\nTn8qIq4p99EPlfvSNRHxk3I/O2Mfu9XZseCtEXFXRDwYEd+PiOPLdXd2LPiHss3TEdEUEdeWNd8V\nEUPKn6vc3y/cl23a1X7c27p43fbqsaK3RMSx5b70lxGxotw2/xkR11a0ed12iYgPRcSCcviTEfFk\nxfJ+WJvevF657+xx1jAiFkbExeXw1RHxeEQ8EhFfioj3AH8C/G0Ux9C31qj0rv5WPRURfxXF8fXR\nitffqVEcd9dFxI8iom2/vDgivlm+zv4zIj5fsfyLymPD+oi4PiIaatVXAFJKA/IfMA7YBZxUjt8B\nXAS8uaLNlcCl5fDNFMm+oRy/AngQOKgcnwH8XTn8dmBtP+jjecCSivFDgEMrxpcBUyr6NxV4I8X3\nOo6nCBt3AHfuZx2/BzwKHAyMoHie2aeAe4HGss1hwFPl8MVlm+HAaGArMKuc97+BvyiH723rH/CH\nQEvFzy8sh/8JOL0cPhrYUA5/Fbi8HH4/kMoa2modBrwJeAw4udxfdgPvrqj3PmBYOX5Z2/Iy7Zu7\ngXcDRwL/Vf4eBgPfA84t26WK7Xct8Lka7leV2/KlinlTgZvL4TuBC8vhWW3tgInlNh5D8WbufuD0\nin3x7WW7Wyq2/VPAvIr13At8uRyeDPyfffx9d3Ys+C4wvpz2LuB7Fa+VjseCHwBDgHdSPKvv7HLe\nv1Zsr3uBxp5sU7rYj3t5O3f1ur2ZXjpW9EIfxlG8OTkOWFdun4uBJ8v99o3A0xTfDNLpdgF+C1hT\nLu9fKB6KfRTFMf+Lte5jRV9fKl9Ld1ZMW1j2980UXxHX9kn/kRX78tR+UHtnx5SneO3v7xzg6+Xw\nCGBwOfzHwPJy+GLgmbKvB5XbvRF4B/DvwJCy3SLgI7Xs70A/Y/XzlNL6cvhBihfhCeU7z0eBacDv\nVLT/57TnJYeVKaVX2uYBHyjfnf4pxQ5ba48Ck8p38meklLYCZ0bEj8v+/Tf27B/A8RS/l/9MxV74\njxnqOAP415TSyymlF3n9k/k7c09KaVtKaQvFH91/r+jTuIp23wBIKd0HjIiIkR2W88fAwohYX653\nRBRnlv6Qsm8ppVXA82X708tat6eUXgJWlPUDPJ1SeqAcfjcwAfhhuewZwG9X0a9qta3r94F7U0pb\nUnHJ6daydoBXKf7Aw2v7b1/obL+qxmkUrxMogkKln6SUWlNxyXM9RV+Oo9gXf1a2+Qde6zvA7R2W\nsaL8vye/i86OBe8B/rncvtcDR1S073gs+FZKaSfF76YBuKuc3nF/hZ5t0672497U3eu2N44VvWE0\n8E1gWkrp4XLad1NKW1NKv6a4DPbbdLFdUkr/l+KM6nCKAPZPFNvrDOD7fdyXntoK/Bq4MSKaKMJ/\nf9LVMaWz1/QhFK/LFoo32pV/w76TUvpV+Xd5BcXx/L0UbxLWlK+f9wLH9mpvujHQ77HaUTH8G4qU\nezPFu8eHy1OoEyvabO/w8+3jKaWXI+I7wDnAf6fYkDWVUvpZRJxC8Q7+yii+/PrPKM4sbIqIKyje\nsdXKLl673Nyxjspts7tifDd77pcdH7TWcXwQxVmmX1dOjAh6oHL7B8WL+MKeLGgf19WVneUfNCj2\n3z55vXaxX+3RpGK42v2r42uxmr50/B21LaMnv4uO638L8EJK6aQu2ne67pTS7oio3C4d99fudLVN\nO92PVZWtFGeiTue1e4n2dX/7EcU3hjxBEab+lOKNwv/MWun+qzymQvn6S8U3pJxKESqmAs0Ub6z7\nhb0cUzp7Tf8NxRvvD0bEOIozwe2L6rhoimP1P6SUPt0LpffIQD9j1ZnhwDPlmadp+/izX6e4xLQm\npfR8d417W0QcCbycUvpH4G+BU8pZz5Xvdqd28mM/BcZVXG/PERzuA86N4r6V4cCUcvpTvBZAO6ul\nGucDRMTpFN9B2fHsybeBS9tG4rX74O4DPlxOOxsYVU7/flnrwRExDPggnb8rfQD4g4h4W7mMYRHx\n9h72YW9+AvxRFPegNVBsj//ohfVUbS/7VZtfRsQ7ImIQxe+vzQMUp/yh+E7R7jxBsS++rRyfTt/1\n/UXg5xHxISg+RBD57pnsyTbtaj/uTV29btv0xrGiN7xKsR9+JCI+vJd2e9su36e4DHofxSXFM4Ed\n+3C2tq88DUyIiDeUZ+/fC1Ae7w9Jxbek/A+KS6IA2yj+5tVUFceUSocAm8vhizvMmxQRh0bEQRSX\ncX9IcUl/akQcXq7r0IjIeXVhnx2Iwep/AT+m2CA/3ZcfTCk9SHFAvqkX6uqJE4GflKc/P09xz9gS\nimvPd1PcK7CH8h3xTGBVFDekPru/RaSUHqK4bPMw8K2K9X4JmB0R6yjuWeqJX5c/vxj4WCfz/xxo\nLG/YfJzi3h6AvwL+MCIeo7jp9r8qar2Z4iD7Y4rr+us66dMWihf1NyLiEYr7go7vYR+6lIovK58P\n3EPx+3swpfTN3OvZR53tV5XmU1zO+hHFPQ9t/gKYW/6+3kZxJqFL5b74UYrT/o9SnP1ZvLefyWwa\n8LGIeJgO6JJ8AAABC0lEQVTiXrtzciy0h9u0q/241+zldds2P/uxoreklLYDH6AIFSO6aLO37fJ9\nisuA95WXgDdR3FfXn6SU0iaKe91ayv/bjl3DgTvL194PgLnl9NuAvyxvBK/Zzet0f0ypdC3wxfK4\n3/FM40+A5cAjFPderU0pPQ58Dvh22f/vsOdl/T7nV9rsgzJ13wscn/x4vLSHKD5h+0pKKUXEBRQ3\nsmcJK9KBLCLeDDyUUqrpmZhaKm/daUwpNde6lu4M9HussomIjwBXAXMNVVKnfo/iBuwAXqC4T0XS\nfqh4Q/+lGpeiKnnGSpIkKZMD8R4rSZKkXmGwkiRJysRgJUmSlInBSpIkKRODlSRJUib/Hzjqq2KO\nNM6aAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "tfidf for article 7 is:\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEyCAYAAAA4KJ7OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH05JREFUeJzt3X+UXWV97/H3NxNQw0/bhFZIUqCFcKmo2AFri236Qy5E\nsqAhFTQEkEpIYtBerhfThVorcvkhi0oNmhBWQFLaoBLaXIlSlwVBQUgwCAkamgtEBq2g14IJGBLn\ne//Ye8aTcSZzMjwzZ2byfq01K3vv85x9vvvsffZ85tlP9onMRJIkSa/cmFYXIEmSNFoYrCRJkgox\nWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFjG3VC48fPz4PPfTQVr28\nJElS0x566KEfZ+aE/tq1LFgdeuihrF27tlUvL0mS1LSI2NxMOy8FSpIkFWKwkiRJKsRgJUmSVIjB\nSpIkqZCWDV5vha1btzJ//nz23ntvpk6dyqxZswD44Q9/yOWXX05mcuaZZzJ+/HiuvPJKMpOjjjqK\nD33oQy2uXJIkjQR7VI/VypUrmTlzJkuXLmXVqlXdy6+++mr2228/xowZw8SJE5kyZQrLli3jxhtv\n5IEHHmhhxZIkaSTZo4JVR0cHkyZNAqCtra17+YYNGzj77LP52Mc+xqWXXtq9fMWKFZx44olDXqck\nSRqZ9qhgNXHiRDo6OgDo7OzcaflrX/ta9t13X37+858DVajavHkzc+fObUmtkiRp5InMbMkLt7e3\n51DfIHTr1q0sWLCAV7/61Zxwwgl85StfYfny5Tz22GNcddVVRATvfe97GTduHNOnT+eUU05h3Lhx\nXHPNNUNapyRJGl4i4qHMbO+33Z4UrCRJkgai2WC1R10KlCRJGkyjPljt6CzfIzcY65QkSSPfqL+P\n1dgxwRXrthdd58Jj9yq6PkmSNDqM+h4rSZKkoWKwkiRJKsRgJUmSVIjBSpIkqZCmglVEnBQRGyNi\nU0Qs7KPN1Ih4OCI2RMTXy5YpSZI0/PX7vwIjog24Dng70AGsiYhVmflYQ5sDgc8AJ2Xm9yPioMEq\nWJIkabhqpsfqeGBTZj6RmS8DK4BTe7R5N7AyM78PkJnPli1TkiRp+GsmWB0CPN0w31Eva3Qk8NqI\nuDsiHoqIs3tbUUTMiYi1EbH2ueeeG1jFkiRJw1Spwetjgd8D3gH8d+AjEXFkz0aZeX1mtmdm+4QJ\nEwq9tCRJ0vDQzJ3XnwEmNcxPrJc16gB+kplbga0RcQ/wRuDxIlVKkiSNAM30WK0BjoiIwyJib+BM\nYFWPNv8KnBARYyNiHPAW4LtlS5UkSRre+u2xyswdEbEAuBNoA5Zl5oaImFs/vjgzvxsRXwEeATqB\nGzJz/WAWLkmSNNw09SXMmbkaWN1j2eIe858EPlmuNEmSpJHFO69LkiQVYrCSJEkqxGAlSZJUiMFK\nkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJ\nUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRC\nDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRis\nJEmSCmkqWEXESRGxMSI2RcTCXh6fGhHPR8TD9c9Hy5cqSZI0vI3tr0FEtAHXAW8HOoA1EbEqMx/r\n0fTezDxlEGqUJEkaEZrpsToe2JSZT2Tmy8AK4NTBLUuSJGnkaSZYHQI83TDfUS/r6Q8i4pGI+HJE\n/G6R6iRJkkaQfi8FNunbwOTM3BIR04B/AY7o2Sgi5gBzACZPnlzopSVJkoaHZnqsngEmNcxPrJd1\ny8wXMnNLPb0a2CsixvdcUWZen5ntmdk+YcKEV1C2JEnS8NNMsFoDHBERh0XE3sCZwKrGBhHxmxER\n9fTx9Xp/UrpYSZKk4azfS4GZuSMiFgB3Am3AsszcEBFz68cXAzOBeRGxA3gJODMzcxDrliRJGnaa\nGmNVX95b3WPZ4obpRcCisqVJkiSNLN55XZIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmS\nJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkq\nxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjB\nSpIkqRCDlSRJUiEGq1Fg69atnHPOOZx//vnccsstOz326KOPctBBB7FlyxYALrjgAo455phWlClJ\n0qhnsBoFVq5cycyZM1m6dCmrVq3qXr59+3ZuuOEGTj755O5lS5YsYcqUKa0oU5KkUc9gNQp0dHQw\nadIkANra2rqXX3311bz//e8nIlpVmiRJexSD1SgwceJEOjo6AOjs7Oxe/vDDD/PpT3+aBx98kCVL\nlrSqPEmS9hgGq1FgxowZ3HbbbcybN4/p06cze/ZsAG699VY+9alPcfzxx3PBBRcAcMkll7Bu3Trm\nzp3Ltm3bWlm2JEmjTmRmS164vb09165dOySvdcW67UXXt/DYvYquT5IkDW8R8VBmtvfXzh4rSZKk\nQpoKVhFxUkRsjIhNEbFwF+2Oi4gdETGzXInqzY7O8j2Ng7FOSZL2JGP7axARbcB1wNuBDmBNRKzK\nzMd6aXcl8G+DUah2NnZMeIlTkqRhppkeq+OBTZn5RGa+DKwATu2l3YXAbcCzBeuTJEkaMZoJVocA\nTzfMd9TLukXEIcBfAJ/d1YoiYk5ErI2Itc8999zu1ipJkjSslRq8/ingQ5nZuatGmXl9ZrZnZvuE\nCRMKvbQkSdLw0O8YK+AZYFLD/MR6WaN2YEV9h+/xwLSI2JGZ/1KkSkmSpBGgmWC1BjgiIg6jClRn\nAu9ubJCZh3VNR8RNwJcMVZIkaU/Tb7DKzB0RsQC4E2gDlmXmhoiYWz++eJBrlCRJGhGa6bEiM1cD\nq3ss6zVQZea5r7wsSZKkkcc7r0uSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaS\nJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmS\nCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVi\nsJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQV0lSwioiTImJj\nRGyKiIW9PH5qRDwSEQ9HxNqIOKF8qZIkScPb2P4aREQbcB3wdqADWBMRqzLzsYZmXwNWZWZGxBuA\nzwNHDUbBkiRJw1UzPVbHA5sy84nMfBlYAZza2CAzt2Rm1rP7AIkkSdIepplgdQjwdMN8R71sJxHx\nFxHxPeAO4LzeVhQRc+pLhWufe+65gdQrSZI0bBUbvJ6Zt2fmUcBpwKV9tLk+M9szs33ChAmlXlqS\nJGlYaCZYPQNMapifWC/rVWbeAxweEeNfYW2SJEkjSjPBag1wREQcFhF7A2cCqxobRMTvRETU028G\nXgX8pHSxkiRJw1m//yswM3dExALgTqANWJaZGyJibv34YuB04OyI2A68BJzRMJhdkiRpj9BvsALI\nzNXA6h7LFjdMXwlcWbY0SZKkkcU7r0uSJBVisJIkSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRC\nDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpkKa+hFlqha1btzJ//nz23ntvpk6d\nyqxZswC44oorePLJJ/nxj3/Mtddey8SJEwGYO3cu++67L1dffXUry5Yk7cHssdKwtXLlSmbOnMnS\npUtZtWpV9/KFCxeyZMkSzjrrLO666y4AvvjFL3Lccce1qlRJkgCDlYaxjo4OJk2aBEBbW9tOj23Z\nsoXPf/7znHbaafzoRz9i3bp1/Nmf/VkrypQkqZvBSsPWxIkT6ejoAKCzs7N7+QsvvMC8efO46qqr\n2G+//fj617/Os88+y8c//nHuuusuHn/88VaVLEnawznGSsPWjBkzWLBgAXfccQfTp09n9uzZLF++\nnHPPPZft27dz2WWX8c53vrP756mnnmLRokUceeSRrS59l5odO3bwwQczb948XnrpJcaNG8fixYtb\nXLkkqT+RmS154fb29ly7du2QvNYV67YXXd/CY/cqur6BGq3bNdotX76cAw88kOnTp3PGGWdw6623\n7vT47bffzpYtW5g9e3b3srPOOoubb76ZMWPsZJakVoiIhzKzvb92nqWlIdbs2DGAxx57jHPOOYcD\nDzzQUCVJI4Bnag0bOzoHp/d0sNY7UM2OHQM4+uij+dznPkdnZyebN29uSb2SpOY5xkrDxtgxUfzy\nJgy/S5zNjh076qijuPzyy+ns7GTs2LHdvVySpOHLYCUNsX322Ycbb7yxe75r8PrKlSt/pe2nP/3p\nIatLkvTKeSlQkiSpEIOVNMj2lLFjkiQvBUqDbk8ZOyZJssdKkiSpGIOVJElSIQYrSZKkQgxWkiRJ\nhRisJEmSCjFYSZIkFWKwkiRJKqSpYBURJ0XExojYFBELe3l8VkQ8EhGPRsR9EfHG8qVKkiQNb/0G\nq4hoA64DTgaOBt4VEUf3aPYk8MeZeQxwKXB96UIlSZKGu2Z6rI4HNmXmE5n5MrACOLWxQWbel5k/\nrWe/BUwsW6YkSdLw10ywOgR4umG+o17Wl78CvvxKipIkSRqJin5XYET8CVWwOqGPx+cAcwAmT55c\n8qUlSZJarpkeq2eASQ3zE+tlO4mINwA3AKdm5k96W1FmXp+Z7ZnZPmHChIHUK0mSNGw1E6zWAEdE\nxGERsTdwJrCqsUFETAZWArMz8/HyZUqSJA1//V4KzMwdEbEAuBNoA5Zl5oaImFs/vhj4KPDrwGci\nAmBHZrYPXtmSJEnDT1NjrDJzNbC6x7LFDdPvBd5btjRJkqSRxTuvS5IkFWKwkiRJKsRgJUmSVIjB\nSpIkqRCDlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKkQg5Uk\nSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJUiMFKkiSpEIOVpCK2bt3KOeec\nw/nnn88tt9zSvXz16tVMmzaNRYsWAbBx40bOO+883vOe93DllVe2qlxJGhRjW12ApNFh5cqVzJw5\nk+nTp3PGGWcwa9YsAKZNm8a4ceNYv349AFOmTGHZsmUAzJgxo2X1StJgsMdKUhEdHR1MmjQJgLa2\ntn7br1ixghNPPHGwy5KkIWWwklTExIkT6ejoAKCzs3OXbVesWMHmzZuZO3fuUJQmSUPGYCWpiBkz\nZnDbbbcxb948pk+fzuzZswG4//77ueaaa7j11lu57bbbWLduHR/84Ad58sknueiii1pcdf+aHTsG\ncMEFF3DMMce0okxJw4RjrCQVsc8++3DjjTd2z3eNsXrrW9/KqlWrdmrb1bM1EjQ7dgxgyZIlzJw5\ns1Wl7patW7cyf/589t57b6ZOndq9XatXr2bRokVMmzaNBQsWAFVgvO+++3j00UdbWbI0IthjJUm7\nsLtjx0aKrsC4dOnSnYLvtGnTuPjii3dqu2TJEqZMmTLUJQ6IPYxqNYOVJO3C7owdG0kMjCMrMGrk\nMFhJGpAdnTmi1jtQzY4dA7jkkktYt24dc+fOZdu2ba0su18GRmlwOMZK0oCMHRNcsW578fUuPHav\n4ut8JXZn7Nhll13GZZddNqT1DdSMGTNYsGABd9xxR3dgXL58eXdg/OlPf8rrXvc6Tj/99J0C47XX\nXsurXvWqVpffp67A+KY3vWlUBUaNHAYrSdoDGRhHVmDUyGGwkqQGOzqTsWNixKxXOxutgbGv/8W5\nfv16Lr/8cgD+5m/+hsMPP5w5c+aw//77c/DBB/PhD3+4lWXvkQxWktRgT7nEqZGlr9t+XHvttVx3\n3XVEBBdffDFz5sxhypQpfOQjH+F973sfTz/9dPeYMw2NpgavR8RJEbExIjZFxMJeHj8qIu6PiG0R\n8cHyZUqSXonR+p8NRut29dTXoPznn3+eAw88kAMOOICf/exnHHvssWzbto2LLrqIH/zgBzzzzDOt\nKrkpfd0eY/369cyaNYtZs2axfv16XnzxRc466yzmz5/PJz7xiRZW3L9+e6wiog24Dng70AGsiYhV\nmflYQ7P/B7wfOG1QqpQkvSKjtSdutG5XT30Nyj/ggAN4/vnniQj2228/xowZ0x08zjvvPA4//PBW\nldyU0dgT10yP1fHApsx8IjNfBlYApzY2yMxnM3MNUP7oliRpD9fXbT8+8IEPcOGFF7JgwQIuvPBC\nAObPn8/5559Pe3s7Bx10UCvL7tdo7IlrZozVIcDTDfMdwFsG8mIRMQeYAzB58uSBrEKSpD1OX4Py\nX//613PzzTfv1PYzn/nMkNb2SozGnrghHbyemdcD1wO0t7cPrwvYkiRpSPV1e4yunjig+4758+fP\nZ/v27cO+J66ZYPUM0Hghc2K9TJIkDYI95bYfo7EnrplgtQY4IiIOowpUZwLvHtSqJEnag+0pg/JH\no36DVWbuiIgFwJ1AG7AsMzdExNz68cUR8ZvAWmB/oDMi/ho4OjNfGMTaJUmShpWmxlhl5mpgdY9l\nixum/5PqEqEkSVKv9oRLnN55XZIkDYk94RJnU3delyRJUv8MVpIkSYUYrCRJkgoxWEmSJBVisJIk\nSSrEYCVJklSIwUqSJKkQg5UkSVIhBitJkqRCDFaSJEmFGKwkSZIKMVhJkiQVYrCSJEkqxGAlSZJU\niMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhRisJEmSCjFYSZIkFWKwkiRJKsRgJUmSVIjBSpIkqRCD\nlSRJUiEGK0mSpEIMVpIkSYUYrCRJkgoxWEmSJBVisJIkSSrEYCVJklSIwUqSJKmQpoJVRJwUERsj\nYlNELOzl8YiIf6gffyQi3ly+VEmSpOGt32AVEW3AdcDJwNHAuyLi6B7NTgaOqH/mAJ8tXKckSdKw\n10yP1fHApsx8IjNfBlYAp/Zocypwc1a+BRwYEa8rXKskSdKw1kywOgR4umG+o162u20kSZJGtcjM\nXTeImAmclJnvrednA2/JzAUNbb4EXJGZ36jnvwZ8KDPX9ljXHKpLhQBTgI2lNqSA8cCPW13EIHC7\nRha3a2Rxu0YWt2tkGW7b9VuZOaG/RmObWNEzwKSG+Yn1st1tQ2ZeD1zfxGsOuYhYm5ntra6jNLdr\nZHG7Rha3a2Rxu0aWkbpdzVwKXAMcERGHRcTewJnAqh5tVgFn1/878PeB5zPzh4VrlSRJGtb67bHK\nzB0RsQC4E2gDlmXmhoiYWz++GFgNTAM2AS8C7xm8kiVJkoanZi4FkpmrqcJT47LFDdMJvK9saUNu\nWF6iLMDtGlncrpHF7RpZ3K6RZURuV7+D1yVJktQcv9JGkiSpEIOVJElSIaM2WEXEoRGxvtV1DBcR\n8fGI+PNelk+t70M2GK85JPsgIm7o5WuWNEJFxFMRMb5Frz3szxsR8bGI+GCB9dwdEb/yX9kj4tyI\nWLSb62rZPutP1/vVinNgKRGxpf734Ij4Yj292/tpqHXVPYDnvaJ90texPVSaGry+p4mIsZm5o6/5\nkSgzP9rqGnbH7rznXTevHYkiIqjGOna2uhZpKLTqmB9p58DeZOYPgJmtrkO7Nmp7rGptEbE0IjZE\nxL9FxGsi4vyIWBMR34mI2yJiHEBE3BQRiyPiAeCq+q+c5RHxTWB5RNwTEW/qWnFEfCMi3tiqDatr\n2Cci7qi3ZX1EnBERH623b31EXF+fxLq2b2Y9fVJEfC8ivg3MGOQye9sHvx0RX4mIhyLi3og4qqHG\nnvvgc3WbzRExIyKuiohH6+fvVT+v+6+TiHhX/fj6iLiy4b3aEhGX1e/VtyLiN+rlE+rjYE3984eD\n/H509YpsjIibgfXA7N2pudUi4n9FxPvr6b+PiH+vp/80Im6JiM9GxNp6n/9dw/Oeioi/i4hv19vb\ntd9/vT42NkTEDUC0ZMN6iIjDI2Jdvb0r62PuPyLiqoY2v3K8RcRfRsQ19fQHIuKJhvV9c4C1XBIR\nj0fEN6i+taLncT8+Ip6qp8+NiH+JiK/W7/mCiLio3pZvRcSvNax6dkQ8XNd/fC+v2+vnY1f7rH6t\n9fXPX9fLeh7zkyLixIi4vz4evhAR+w7kvdmN96tV58Bioo8e1Yh4R/1ejm/FOa0/UflkfUw8GhFn\n7Gp5j+ceVx+7vx3V77xlEfFgvezUus1rImJFRHw3Im4HXjPEm7izzByVP8ChwA7gTfX854GzgF9v\naPMJ4MJ6+ibgS0BbPf8x4CHgNfX8OcCn6ukjgbXDYBtPB5Y2zB8A/FrD/HJgesP2zQReTfW9jkdQ\nnQw/D3xpiPfB14Aj6mVvAf59F/vgG8BewBup7pF2cv3Y7cBp9fTdQDtwMPB9YAJVb+y/N7TJhvfi\nKuDD9fQ/ASfU05OB7w7RsdkJ/P5Aam71T133F+rpe4EH6330t8AFXccg1X3v7gbeUM8/1fB5mw/c\nUE//A/DRevod9XaPb9G2HUr1i38KsK4+7s4Fnqg/X68GNlN900Sv+w74TWBNvb4vUt1k+RCqc8jl\nA6jp94BHgXHA/lT3C/xg13FftxkPPFVPn1u32a+u7Xlgbv3Y3wN/3fC5WVpP/xGwvuH5i3b1+ehr\nnzXUug+wL7ABOLbxmG+o9x5gn3r+Q13rK7AP+3q/bmKIz4EFj8stjcdn434C/oLqc/jaXe2zFtd9\nOvBVqnPCb9Sfm9ftYvlUqt8Ff0D1e3hyvZ7/DZxVTx8IPF4faxdR3WMT4A1Uv3faW7Xdo73H6snM\nfLiefojqoHx9VD0gjwKzgN9taP+FzPxFw/yqzHyp6zHglKh6Sc6j+pC22qPA2yPiyoh4W2Y+D/xJ\nRDxQb9+fsvP2ARxF9b78R1ZH4T8Oco297YM/AL4QEQ8DS6g+SF167oMvZ+Z2qm1tA75SL3+0Xlej\n44C7M/O5rC4j3kL1CwPgZaoPamMdAH8OLKprWQXsX/Iv513YnJnfGmDNrfYQ8HsRsT+wDbifKti+\njeoE/866J2Ad1fHXOP5tZcM6Dq2n/4j6OMzMO4CfDnL9/ZkA/CswKzO/Uy/7WmY+n5k/Bx4Dfos+\n9l1m/iewb0TsRxXA/olqG7ven931NuD2zHwxM1/gV7/5ojd3ZebPMvM5qmD1f+rlPT83/wyQmfdQ\nHfsH9lhPX5+PvvbZCXWtWzNzC9X+flv9WNcxD1U4Pxr4Zr3uc6je0xL6e7+G+hw4mP6UKpS+IzO7\n9kGrzmm7cgLwz5n5i8z8EfB1qs9PX8sB/hvVfaymZ+b362UnAgvrbbubKiRPZufj8RHgkSHZqj6M\n9jFW2xqmf0HVPXgTVY/AdyLiXKpk3GVrj+d3z2fmixHxVeBU4J1UfxW1VGY+HhFvprrr/Sei+vLr\n91El9acj4mNUB14r9dwHvwH8V2a+qY/2PffBNoDM7IyI7fWJEKq/fnfn+G187i8anjuG6q/on+/G\nukrouZ296avmlsrM7RHxJNVfzPdRncT+BPgd4CWq3oHjMvOnEXETOx+DXcfDsNmeXjxP9ZfzCVQh\nCn71OO6v9vuovoFiI1WYOg94K/A/C9a5g18O5+j5OW+st7NhvufnpueNDHvO9/r5iBjQ1drGYz6A\nr2bmuwayInX7v8Dh1FdR6mWtOqeV9kOq4/pY4Af1sgBOz8yNjQ0HeDwOmtHeY9Wb/YAf1j1Ps3bz\nuTdQdYGvafjroGUi4mDgxcz8R+CTwJvrh35c/4XS2yDH7wGHRsRv1/NDfWJ7AXgyIv4Suq+xlxqr\n9iDwx/U4gzaqbft6P8/5N+DCrploGEc3RAZS83BwL1WAuqeenkvVQ7U/1S/Q56MaE3ZyE+u6B3g3\nQEScDLx2MAreDS9TXV45OyLevYt2u9p3je/POqrgua3uVd5d9wCn1eNI9gOm18uf4pd/4A10QHPX\nWJcTqL7jtWd9fX0++tpn99a1jouIffjlZaqevgX8YUT8Tr2OfSLiyAFuQ099vV9dWn0OLGkz1eW0\nmyOi6+pEq89pvbkXOCMi2iJiAlUP04O7WA7wX1SXmS+PiKn1sjuBCyO6xw4fWy9vPB5fT3U5sGX2\nxGD1EeAB4JtUH7CmZeZDVMHgxkGoayCOAR6su0X/lmrM2FKqMSJ3Uo3t2En9V8wc4I76cs2zQ1du\nt1nAX0XEd6jGYJxaYqVZffH3QuAu4DvAQ5n5r/087f1Ae0Q8EhGPUQWEITPAmoeDe6ku4d5fd+H/\nHLi3vnS2juqz9U9Un7P+/B3wRxGxgWog8ff7aT/oMnMrcArwP6jCYm9tdrXv7qW6DHhPfWn7aarx\nggOp5dvArfVrfJlffq6vBuZFxDqqMUsD8fP6+YuBv+rl8b4+H73us7rWm6h+OT5ANY5uXS/b9BxV\nj+c/R8QjVJeTjxrgNvRcd1/vV9fjw+EcWExmfo/qnPqFOiy29JzWh9upera/QzUW8eL6knlfywGo\nzy2nANdFxFuAS6nGcz5SH3uX1k0/S3X5/bvAx6mGGrSMX2mzG+oeoruBo9L/Hi9JknrYE3usBiQi\nzqb6C+wSQ5UkSeqNPVaSJEmF2GMlSZJUiMFKkiSpEIOVJElSIQYrSZKkQgxWkiRJhfx/vCBwLLEb\nXYUAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "feature_names = vectorizer.get_feature_names()\n", "li = [] #li is a list of tfidf (top 10 words) in the book.\n", "tfidf = []\n", "for l in tfidfArray:\n", "\n", " # sort tf-idf /How to explain this : ???\n", " #print [[feature_names[x],l[x]] for x in (l*-1).argsort()][:5]\n", " tfidf.append([(feature_names[x],l[x]) for x in (l*-1).argsort()][:10])\n", "\n", "\n", "# plot the result\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "\n", "def plotTfidf (li): # parameter li is a list of tfidf (top 5 words) in the book. \n", " #e.g. [(u'pens', 0.43280168447929529), (u'the', 0.29211105475772814), (u'bit', 0.21640084223964764), (u'of', 0.2133540325973512), (u'am', 0.18396048627999845)]\n", " \n", " # set figure size\n", " fig = plt.figure(figsize=[10, 5])\n", " # plot tfidf of a given doc\n", " labels,Y = zip(*li) # See notes below\n", " X = np.arange(len(labels))\n", " plt.bar(X,Y,width = 0.4,facecolor = 'lightskyblue',edgecolor = 'white') \n", " plt.xticks(X, labels)# stick labels on x axis\n", " for x,y in zip(X,Y):\n", " plt.text(x,y+0.005,round(y,2), ha='center', va= 'bottom',fontsize=7) #round(y,2): two decimals\n", " plt.show() \n", "\n", "i = 0\n", "for doc in tfidf:\n", " print \"tfidf for article \" + str(i+1) +\" is:\"\n", " plotTfidf(doc)\n", " i +=1 \n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "From the result, we can see that there are common words like 'harry' 'said' 'ron'...\n", "This is because that the documents number is too small. So we need to set the max-df parameter, and filter out those words which have a really high df. Please see example here:\n", "http://localhost:8888/notebooks/Desktop/temp/UofA%20/Learn%20python/machine%20learning/Feature%20selection_finding%20distinctive%20words(sklearn-CountVectorizer%2B%20TfidfTransformer)%20.ipynb\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 2 }