{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Yelp text data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "OSError", "evalue": "[E050] Can't find model 'en_core_web_md'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mlaptop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'/media/seapea/Blade HDD/_Storage/Data/yelp_dataset/'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mtower\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'/run/media/seapea/HDD1TB_1/SharedSpace/_Large_datasets/Yelp/'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mnlp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspacy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'en_core_web_md'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdisable\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tagger'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ner'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0mread\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mReadability\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnlp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlast\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/spacy/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(name, **overrides)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdepr_path\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mdeprecation_warning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mW001\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdepr_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/spacy/util.py\u001b[0m in \u001b[0;36mload_model\u001b[0;34m(name, **overrides)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exists\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Path or Path-like to model data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mload_model_from_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moverrides\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mErrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mE050\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mOSError\u001b[0m: [E050] Can't find model 'en_core_web_md'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory." ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import re\n", "import os\n", "import seaborn as sns\n", "import dask.dataframe as dd\n", "import spacy\n", "from spacy_readability import Readability\n", "from dask import delayed\n", "from dask_ml.model_selection import train_test_split\n", "from gensim.models import Word2Vec, Doc2Vec\n", "from gensim.models.doc2vec import TaggedDocument\n", "from gensim.corpora import Dictionary\n", "from gensim.similarities import Similarity\n", "from gensim.models import LsiModel\n", "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n", "from sklearn.manifold import TSNE\n", "# from sklearn.model_selection import train_test_split\n", "# from gensim.test.utils import common_dictionary, common_corpus\n", "# from tqdm import tqdm\n", "\n", "sns.set_style('darkgrid')\n", "laptop = '/media/seapea/Blade HDD/_Storage/Data/yelp_dataset/'\n", "tower = '/run/media/seapea/HDD1TB_1/SharedSpace/_Large_datasets/Yelp/'\n", "nlp = spacy.load('en_core_web_md', disable=['tagger', 'ner'])\n", "read = Readability(nlp)\n", "nlp.add_pipe(read, last=True)\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read in a sample of the larger dataset for necessary trial and error\n", "df0 = pd.read_parquet(laptop + 'parquet/part.0.parquet')#.set_index('review_id')\n", "df1 = pd.read_parquet(laptop + 'parquet/part.1.parquet')#.set_index('review_id')\n", "df2 = pd.read_parquet(laptop + 'parquet/part.2.parquet')#.set_index('review_id')\n", "df3 = pd.read_parquet(laptop + 'parquet/part.3.parquet')#.set_index('review_id')\n", "df4 = pd.read_parquet(laptop + 'parquet/part.4.parquet')#.set_index('review_id')\n", "\n", "# Let's concat and convert to Dask\n", "df = pd.concat([df0, df1, df2, df3, df4], axis=0)\n", "del(df0, df1, df2, df3, df4)\n", "ddf = dd.from_pandas(df, chunksize=2**12)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
review_idstarsusefultext
0Q1sbwvVQXV2734tPgoKj4Q16Total bill for this horrible service? Over $8G...
1GJXCdrto3ASJOqKeVWPi6Q50I *adore* Travis at the Hard Rock's new Kelly ...
22TzJjDVDEuAW6MR5Vuc1ug53I have to say that this office really has it t...
3yi0R0Ugj_xUx_Nek0-_Qig50Went in for a lunch. Steak sandwich was delici...
411a8sVPMUFtaC7_ABRkmtw17Today was my second out of three sessions I ha...
\n", "
" ], "text/plain": [ " review_id stars useful \\\n", "0 Q1sbwvVQXV2734tPgoKj4Q 1 6 \n", "1 GJXCdrto3ASJOqKeVWPi6Q 5 0 \n", "2 2TzJjDVDEuAW6MR5Vuc1ug 5 3 \n", "3 yi0R0Ugj_xUx_Nek0-_Qig 5 0 \n", "4 11a8sVPMUFtaC7_ABRkmtw 1 7 \n", "\n", " text \n", "0 Total bill for this horrible service? Over $8G... \n", "1 I *adore* Travis at the Hard Rock's new Kelly ... \n", "2 I have to say that this office really has it t... \n", "3 Went in for a lunch. Steak sandwich was delici... \n", "4 Today was my second out of three sessions I ha... " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# usr = dd.read_json('G:/SharedSpace/_Large_datasets/Yelp/user.json', lines=True, blocksize=2**28)\n", "# biz = dd.read_json('E:/_Large_datasets/Yelp/business.json', lines=True)#, blocksize=2**28)\n", "\n", "rev = dd.read_json(laptop + 'review.json', \n", " lines=True, blocksize=2**22) # lower blocksize (i.e. 2*22) made this work in Linux\n", "rev = rev.drop(['funny', 'cool', 'date', 'user_id', 'business_id'], axis=1)\n", "rev = rev.drop_duplicates(subset='text')\n", "# rev = rev.set_index('review_id')\n", "# rev.to_parquet(laptop + 'parquet/')\n", "rev.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dask challenges\n", "Throughout this project, I have found it to be very painful working with Dask. It is not covered in the course anywhere, despite being a more realistic work environment where too much data exists to analyze things in memory. Arithmetic computations and other analyses across the Dask dataframe chunks pose challenges we haven't seen when operating only in memory.\n", "\n", "Some basic issues encountered with Dask include:\n", "\n", "- Boolean indexing\n", "- .loc slicing (returning many values)\n", "- spaCy pipelines (memory allocation)\n", "- Word2Vec conversions (memory allocation)\n", "- Basic computations (very long turn around)\n", "\n", "Because of these limitations, some of the work and discovery efforts behind the scenes might not be displayed in the notebook.\n", "\n", "### Distribution of review ratings\n", "Let's try some basic Dask computations, verifying integrity and observing the distribution of review ratings." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5gAAAIWCAYAAAAyBSKhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdeZSk2Vkf6N8XEZmVlVXVVdXV1S16kbolRDQMGCEYGwYDRkIWDB7bZ5AH4e0M2DBnPEaMZR0Wz9hmG7AxB4x9NGOwjGwDBmMWs9gGa2mxSiAkC0moOxpJ3eq9u7r2qtwjvvkjlozI9YtcKiOrnuccncrKjPji3iqg+Ol973uLsiwDAAAAu1U76AUAAABwcxAwAQAA2BMCJgAAAHtCwAQAAGBPCJgAAADsCQETAACAPdHYj4d2Op2y3d7b60/q9SJ7/cyDYi+TyV4m082yl5tlH8mN38vUVP3FJGdv2AfepPbq3+ab6X+Wq7iV9nsr7TW5tfZrrzevg9rvVv8270vAbLfLXLo0t6fPPHVqds+feVDsZTLZy2S6WfZys+wjufF7OXv2xKdu2IfdxPbq3+ab6X+Wq7iV9nsr7TW5tfZrrzevg9rvVv82a5EFAABgTwiYAAAA7AkBEwAAgD0hYAIAALAnBEwAAAD2hIAJAADAnhAwAQAA2BMCJgAAAHtCwAQAAGBPCJgAAADsCQETAACAPSFgAgAAsCcETAAAAPaEgAkAAMCeEDABAADYEwImAAAAe0LABAAAYE8ImAAAAOwJARMAAIA9IWACAACwJwTMMf3f/+nh/MbHzx/0MgAAACaOgDmmd//xi/nDpy8f9DIAAAAmjoA5prJMOuVBrwIAAGDyCJhjKssynVLCBAAAWEvAHFOnjIAJAACwAQFzTGW6bbIAAACMEjDHUPaSZVvCBAAAWEfAHEM/VsqXAAAA6wmYY+hPj3UGEwAAYD0Bcxy9YClgAgAArCdgjmG1gnmw6wAAAJhEAuYY+pXLUgUTAABgHQFzB9ryJQAAwDoC5hj6rbEqmAAAAOsJmGMo0x/yc8ALAQAAmEAC5hhKFUwAAIBNCZhj6A/5cQYTAABgPQFzDCqYAAAAmxMwx1C6BxMAAGBTAuYYVof8SJgAAABrNbZ7QbPZbCb590PfenmSf9Bqtf7pvq1qQnUGFUwBEwAAYK1tA2ar1WoleVWSNJvNepKnk/ziPq9rIvXPXmqRBQAAWG/cFtnXJvlEq9X61H4sZtL1c2VHwgQAAFhn2wrmGm9M8tPbvaheL3Lq1OzOVrTpM2t7/sxxzRfdPF5v7G4tk7CXvWIvk8leJs/Nso/k5toLALC3KgfMZrM5neTPJ/mO7V7bbpe5dGluN+ta59Sp2T1/5rguX1lIkiwtt3e1lknYy16xl8lkL5PnZtlHcuP3cvbsiRv2WQDA7oxTwfyqJB9stVrP79diJp0WWQBuZd/zPf8gFy++mM/5nM/L13/9Nx70cgCYQOMEzK9LhfbYm1nHkB8AbmEf/3grc3NzOXHi5EEvBYAJVWnIT7PZnE3yuiS/sL/LmWzl4JqSg10HAADAJKpUwWy1WnNJzuzzWiZeP2CWkTABAADWGveakltaP1a2lTABAADWETDH0D+DWcqXAAAA6wiYY1g9gylhAgAArCVgjqF/9lKHLAAAwHoC5hg6KpgAAACbEjDH0Z8iK18CAACsI2COoV+5bEuYAAAA6wiYYxjcgylgAgAArCNgjsGQHwAAgM0JmGMw5AcAAGBzAuYY+rFSBRMAAGA9AXMM/bOXzmACAACsJ2COYbVF9mDXAQAAMIkEzDH0K5fOYAIAAKwnYI6hVMEEAADYlIA5hn6udAYTAABgPQFzDP3W2LaACQAAsI6AOYZ+rpQvAQAA1hMwx1DGkB8AAIDNCJhjcE0JAADA5gTMcQwCpoQJAACwloA5hs6gRfaAFwIAADCBBMwxDO7BlDABAADWETDHMJgiG3dhAgAArCVgjmH47KV4CQAAMErAHMNwqNQlCwAAMErAHMNwW6xzmAAAAKMEzDEMH7t0VQkAAMAoAXMMnaGvxUsAAIBRAuYYhltk21pkAQAARgiYYxjuitUhCwAAMErAHMPwuUtnMAEAAEYJmDskYAIAAIwSMMfQKTf+GgAAAAFzLMNVy1IFEwAAYISAuUNt+RIAAGCEgDkGFUwAAIDNCZhjKJ3BBAAA2JSAOYbRgClhAgAADBMwxzB6D+YBLgQAAGACCZhjGM6UKpgAAACjBMwxlCNDfg5wIQAAABNIwBzDcKZsS5gAAAAjBMwxDJ+7dE0JAADAKAFzDKUhPwAAAJsSMMfgmhIAAIDNCZhj6Ax/LV8CAACMEDDHMDpFVsIEAAAYJmCOYThTtpUwAQAARgiYY+i4BxMAAGBTAuYOdSJhAgAADBMwxzB6D+bBrQMAAGASCZhjGB7s4wwmAADAKAFzDMORUgUTAABgVKPKi5rN5qkkb0vy2enmrG9otVrv3c+FTaLhIT/OYAIAAIyqWsH8kSS/1mq1HkzyuUke3r8lTa7hqmWnc3DrAAAAmETbVjCbzeZtSb40yf+aJK1WaynJ0v4uazKNBEw9sgAAACOqtMi+PMm5JG9vNpufm+QDSb6l1Wpd39eVTaByqC3WjB8AAIBRVQJmI8mrk3xzq9X6vWaz+SNJvj3J39/sDfV6kVOnZvdoif1n1vb8meOaPjI1+Pro7PSO1zMJe9kr9jKZ7GXy3Cz7SG6uvQAAe6tKwHwqyVOtVuv3er//uXQD5qba7TKXLs3tdm0jTp2a3fNnjmt+YXnw9bVrCztezyTsZa/Yy2Syl8lzs+wjufF7OXv2xA37LABgd7Yd8tNqtZ5L8mSz2Wz2vvXaJB/b11VNqOF7MLXIAgAAjKp0TUmSb07yU81mczrJJ5N8/f4taXJ1DPkBAADYVKWA2Wq1PpTkC/Z5LRNvdIrswa0DAABgElW9B5OsbZGVMAEAAIYJmGMYjpTyJQAAwCgBcwzDVcu2hAkAADBCwBzDcKYsBUwAAIARAuYYhiOlIT8AAACjBMwxGPIDAACwOQFzDK4pAQAA2JyAOYbhqqUzmAAAAKMEzDGUSeq1IknSli8BAABGCJhjKMuk0QuYKpgAAACjBMwxdMoyvXzpDCYAAMAaAuYYhltkOxImAADACAFzDGVZpl70AqYWWQAAgBEC5hg65WoFU7wEAAAYJWCOoczqkB8VTAAAgFEC5hjKshw6g3nAiwEAAJgwAuYYyjKpOYMJAACwIQFzDP1rSmpFooAJAAAwSsAcQ1kmRVGkKArXlAAAAKwhYI6hTFIkqRfdibIAAACsEjDHUJZliqJbxSydwQQAABghYI6hTDdc1oqkLWACAACMEDDH0CnTG/JTRL4EAAAYJWCOoSzLFClSKwrXlAAAAKwhYI6hO0W2d02JfAkAADBCwBxDpyxTpHsOUwUTAABglIA5hjLd85e1Is5gAgAArCFgjmG1RVYFEwAAYC0BcwydshxcUyJgAgAAjBIwx9Btke1XMA96NQAAAJNFwBxD2Rvy0z2DKWECAAAMEzDH0D2DWaQoirTlSwBuMSsrK0mSS5cu5jd+490HvBoAJpGAOYZOutXLeq1QwQTgljMcMN/97ncc8GoAmEQC5hj6LbJF4gwmAADAGgLmGPotsq4pAQAAWE/AHENZlt17MGsqmAAAAGsJmGMos1rBdAYTAABglIA5hk7Z/QMrkrSVMAEAAEYImGPonsFMt4J50IsBAACYMALmGMqU3RbZmiE/AAAAawmYY+iU3fbYWmHIDwAAwFoC5hjKskytKFKkSEfCBAAAGCFgjqF/BrNeSzoHvRgAAIAJI2COoUy3RbZwTQkAAMA6AuYYOr0W2VoRLbIAAABrCJhjGL6mRL4EAAAYJWCOYXBNiSmyAAAA6wiYY+iU3StKisI9mAAAAGsJmOPo3YNZFzABAADWETDH0Cm7LbJF0T2PCQAAwCoBcwz9a0pqRZG2hAkAADBCwBxDWZa9KbIqmAAAAGsJmGPoDvkpeteUSJgAAADDBMwxlIkKJgAAwCYEzDGUgyE/zmACAACs1ajyomaz+XiSq0naSVZardYX7OOaJlan7A/56YZNAAAAVlUKmD1f3mq1Xty3lRwCZVmmVqR3BvOgVwMAADBZtMiOoXsGs0itiCE/AAAAa1StYJZJ/muz2SyT/Gir1fqxrV5crxc5dWp214sbfWZtz585rqIoMnOkkZVOmRQ73+Mk7GWv2MtkspfJc7PsI7m59gIA7K2qAfOLW63WM81m884k72g2m4+0Wq3f3OzF7XaZS5fm9maFPadOze75M8e10u5keamddqdMu93Z8XomYS97xV4mk71MnptlH8mN38vZsydu2GcBALtTqUW21Wo90/v1hSS/mORP7ueiJlXZLVymKJK2DlkAAIAR2wbMZrN5rNlsnuh/neTPJvnofi9sEpXpTpGt1wpTZAEAANao0iJ7V5JfbDab/df/u1ar9Wv7uqoJ1Z0iW6RTlqbIAgAArLFtwGy1Wp9M8rk3YC0TrztFNqmlMEUWAABgjXHuwbzldcoyRVEkKQVMAACANQTMMZRl9wxmrVZEvgQAABhVaYosXYMpslpkAQAA1lHBHEOnN+SnSAz5AQAAWEMFcwz9a0pqNRVMAACAtQTMMZS9IT+1QgUTAABgLQFzDJ3+GcyiSEfCBAAAGCFgjqFMUiuSetH9utQmCwAAMCBgjqHfItu9C7MbMgEAAOgSMMcwuAezmy+1yQIAAAwRMCsqy3LQIlvrVTDlSwAAgFUCZkX9LFmkGAqYEiYAAECfgFlRP0sWxVCLrHwJAAAwIGBW1J8Y27+mJFHBBAAAGCZgVtSPkrWiGFQw5UsAAIBVAmZF/XbY7hRZFUwAAIC1BMyKVltki6EzmAImAABAn4BZ0WqLrGtKAAAANiJgVjRcrVw9gylhAgAA9AmYFfWzZK0oBlNk2/IlAADAgIBZ0fA9mPVewFTBBAAAWCVgVlRmdchPMRjyc4ALAgAAmDACZkX9MFmLa0oAAAA2ImBWtHpNSVLr/ampYAIAAKwSMCvqZ8miKFKLCiYAAMBaAmZFgxbZIkNnMAVMAACAPgGzqn6LbIbPYB7gegAAACaMgFlRZ3BNSZFazTUlAAAAawmYFXWGK5iD7x3YcgAAACaOgDmmWlGkcE0JAADAOgJmRastskndNSUAAADrCJgVlVm9B7NfwXQGEwAAYJWAWVE5uKakSG/GT9pKmAAAAAMCZkXDWbI2qGAe0GIAAAAmkIBZUb8ddriC2YmECQAA0CdgVrTaIrtawex0DnBBAAAAE0bArGi4VllzTQkAAMA6AmZFnQ1aZOVLAACAVQJmReXQPZj9a0raEiYAAMCAgFnR6j2YKpgAAAAbETAr6l9TUoszmAAAABsRMKsaapEdXFMiXwIAAAwImBV1hlpkCxVMAACAdQTMivrVyiJJvRcwSwETAABgQMCsauiakkKLLAAAwDoCZkWdkTOYWmQBAADWEjAr6kdJQ34AAAA2JmBW1D9vWUuhggkAALABAbOiQbWyyNAZTAETAACgT8CsqEx/yE9Sr/UrmAe5IgAAgMkiYFbUL1bWiiLF4HsSJgAAQJ+AWdFwluyfwWzLlwAAAAMCZkWdoXsw+1NkVTABAABWCZgVDc34Sc0ZTAAAgHUaVV/YbDbrSf4gydOtVuvP7d+SJlO/WlkU3atKhr8HAADAeBXMb0ny8H4tZNJ1hof89FpkncEEAABYVSlgNpvNe5N8dZK37e9yJtegRXbomhIVTAAAgFVVW2T/aZJvTXKiyovr9SKnTs3ueFEbP7O2588cx+zstSTJbSeODtZxZGZqR2s66L3sJXuZTPYyeW6WfSQ3114AgL21bcBsNpt/LskLrVbrA81m889UeWi7XebSpbndrm3EqVOze/7McVy7ttj7dSFXj3QLv9evL+1oTQe9l71kL5PJXibPzbKP5Mbv5ezZSv/dJgAwAaq0yH5xkj/fbDYfT/IzSV7TbDZ/cj8XNYlWz2Cu3oPZ0SILAAAwsG0Fs9VqfUeS70iSXgXzLa1W66/u87omzuoU2aF7MA9wPQAAAJPGPZgVDd+DWfQqmG0XYQIAAAxUvgczSVqt1nuSvGdfVjLh+hXMWlGk3q9gypcA3ILm5ubysY99NH/pL/1PB72UA1Or1VOWnTQaU3nJS16SRmMqZVlmaWkxzzzzdIqiyB133Jlz557P9PSR3HHH2bz44rmUZSfLy8tJMvL9pExR1PK93/sDOXnyZL7v+74rzz77TO644468+OKLufPOu9Jo1NNoTOWbvulv5a1v/ZE8++wzOXPmjly4cD7f8z3/OPff/0AuXryQH/7hH8gb3vDG/MAP/D9JMnjmP/kn35ekzDd+49/O29/+o/k7f+fbcvr06STJxYsX8t3f/X/lr//1v5Ef//Efy9d//f+Wt7/9R/MN3/BN+fEf/7GR167V/8y1z1v7vY3e11/TG9/4V/ODP/iP8pa3/L383M/99OB9w8+5dOli/uE//I5893f/o9x//wPb/h1VWcO4NnvmVp+1H+sYZ207XceNWndVk7aeqiZp3TdqLfXv/M7v3POHdjrldy4sLO/pM2dmprLXzxzHJ8/P5Z2Pvpg3vOrunDk2nX/53k/l8+49mS946amxn3XQe9lL9jKZ7GXy3Cz7SG78Xo4dO/Jskh+7YR94k9qLf5t/7ud+xhVdPf0/h06nnStXLufSpYu5dOlirl69OnjN3Nz1JEm73c7Vq1fSbrfT6XQGPx/+fvc/K3nkkT/KuXMv5AMfeH/a7ZXez1cGn3Hhwvk8/PDH8sQTjw9+vrLSfd/rX//V+cmffHt+//ffmz/4g9/L/Pz8yDPf//735cKF83nkkT/KJz/5iSwtLebVr/7vkyQ/+ZNvz/ve97v52Mf+KI899onBa/q/H37tWv3PXPu8td/b6H39NX3gA+/P3Nz1fOADv59nn31m8L7h5/zqr/7Hwfpf//qv3vbvaLs17OT/lm32zK0+q8qfxV7Y6nNmZqbytrf92FjruFHrrqrqeibt39v9/nMcZ797uZat/m3WIlvR8JCf/q+G/AAAe+nJJ5/IO97x61u+5qmnntjwfR/+8Ify0EPvSlmWuX79+sjP3v3ud4z8vizLPPTQO3Px4sVcvHhh8L6nnur+rP+a/u/7r11r+L0bPW/r971z8Pvr168Nfu2/77HHHhs8593vfkeeeurJwfoff/yxLf+MqqxhXJs9c6vP2o91jLO2vvPnz4+1jhu17qombT1VTdK6b+RaBMyKBi2y6SbMoijiCCYAsNfKsrP9izbwQz/0jzd978rKyrrvdTqd/PzP/0yvMr31Z/Zfu9bwezd63lbv22hNw5/3z//5Dw6e028r7vtn/+wHt1xvlTWMa7NnbvVZ+7GOcdbW91M/9RNjreNGrbuqSVtPVZO07hu5FgGzonJ4yk+SehEBEwCYGNevX9sytK21srKS3/zNh/Jbv/Webd/Xf+1aw+/d6HlbvW+rduuVlZU8+eQTm67rySfXV3G3W9dubfbMrT5rP9Yxztr63vWud421jhu17qombT1VTdK6b+RaBMyKOlkd8pN0K5jOoQAAk+LYseNpNKrPb2w0GvnSL/3yfMmX/Jlt39d/7VrD793oeVu9rz+Vf7PPu+++l266rvvue+mW662yhnFt9sytPms/1jHO2vpe+9rXjrWOG7XuqiZtPVVN0rpv5FoEzIr6WbL/fwprRdIWMAGAPVYUO/t/z9785m/b9L0bBbVarZav+Zo35g1veOO2n9l/7VrD793oeVu9b6tQW6vV8s3f/JbBc6ampkZ+/qY3vWXL9VZZw7g2e+ZWn7Uf6xhnbX1/5a/8tbHWcaPWXdWkraeqSVr3jVyLgFnRIGAOhvwUrikBAPbUffe9NK973eu3fM29966v3t1330vzJ/7Eq/LlX/7aFEWRY8eOjfzsNa953cjvi6LIl3/5V+T06dM5ffr2wfvuvbf7s/5r+r/vv3at4fdu9Lyt3/cVg98fO3Z88Gv/fQ888MDgOa95zety7733Dda/3TUlVdYwrs2eudVn7cc6xllb35kzZ8Zax41ad1WTtp6qJmndN3ItAmZF/Ymx/RbZWlGYIgsAt6harZ6iKDI1NZ377ntpHnjgFbn//pfn7rvvSdI9SnP27F1Juvdd3n33vZmePjJSiRv+/vT0dI4cmcmb3vSWvOENb8z99788R47M5J577s2RIzO5776X5YEHXp5XvrKZb/mWvzv4+d1335uZmaODit4b3vDGPPjgZ+XNb/72HDkyM/LMV76ymVe+8jPyzd/8ljz44Getq7R99md/Tr7lW/5uHnzwswav6f9+q2pH/zPXPq/K+/prevObvzVHj87mzW/+9pH3DT/nTW96S44end22ejnOGsa12TO3+qz9WMc4a9vpOm7UuquatPVUNUnrvlFrKfbjHOHycru8dGluT5956tRs9vqZ4/jljz6X7/n1R/PL3/gn82m3zeQr3vq7+bMP3plvfe2nj/2sg97LXrKXyWQvk+dm2Udy4/dy9uyJDyT5ghv2gTepvfi3+Wu/9i+k0+lkdnY299//8nzXd33/Hq1uct1M/7u7nVtpr8mttV97vXkd1H63+rdZBbOifhBfPYOpggkAADCs+qixW9Bimcwtt5Mk11e698ZcWenkyFI7KZIV+RIAAGBAwNzC3HI7Dz3yQpLk4WevJkne+/HzOTHTyPJKJ0srO7sIGQAA4GakRbaiQYtsr0e2KIrB3ZgAAAAImGPrn8EsirimBAAAYIiAWVE/Sw4qmEk6HQkTAACgT8CsaLVa2U2Y3RZZAAAA+gTMitZVMIvVc5kAAAAImJWtDZNFEh2yAAAAqwTMMa0O+SnSUcEEAAAYEDAr6mfJotcjWzNFFgAAYISAOaZi6FcVTAAAgFUCZkVlb8zP6pAfLbIAAADDBMyK1mbJYoPvAQAA3MoEzIo2uqbEFFkAAIBVAmZFgyE/vVOYWmQBAABGCZhjMuQHAABgYwJmRWW5dsiPM5gAAADDBMyK1mbJIlpkAQAAhgmYFa0O+emfwTTkBwAAYJiAWVFZrp6/TJJasdo2CwAAgIA5hjUJM4UzmAAAAEMEzIrWVjCLImlLmAAAAAMC5hiKoYhZM0UWAABghIBZUZnVK0oSU2QBAADWEjArKstyXYusKbIAAACrBMyKymTkEGZhiiwAAMAIAbOi7pCf1YTZbZE9wAUBAABMGAFzDMWaCqYzmAAAAKsEzIrWXVMSLbIAAADDBMyKypRrzmBqkQUAABgmYFZUJhtMkZUwAQAA+gTMitYP+el+DwAAgC4BcxxDJcxaUahgAgAADBEwKyrLcoMhPwe1GgAAgMkjYFZUxjUlAAAAWxEwq1p7BtMUWQAAgBECZkXrKphxDyYAAMAwAbOiMqNhstsie0CLAQAAmEACZkXda0pWFabIAgAAjBAwK+q2yLoHEwAAYDMCZlVrwmRRJG0JEwAAYEDArKhMOTLkp1YUKpgAAABDBMyK1p3BjCmyAAAAwwTMMYxcU6JFFgAAYERjuxc0m82ZJL+Z5Ejv9T/XarX+4X4vbNJ0K5ijNUz5EgAAYFWVCuZikte0Wq3PTfKqJF/ZbDa/cH+XNXnWZsla0f2eNlkAAICubSuYrVarTHKt99up3n9uuVS1dshPMfj+6NlMAACAW9W2ATNJms1mPckHknx6kre2Wq3f29dVTaK1Q356abPTKVOri5gAAACVAmar1WoneVWz2TyV5BebzeZnt1qtj272+nq9yKlTs3u1xt4za3v+zO3MX17I7NHpJEmtXkutVhv8fnq6niQ5cXI2RxrjzUo6iL3sF3uZTPYyeW6WfSQ3114AgL1VKWD2tVqtS81m8z1JvjLJpgGz3S5z6dLcLpc26tSp2T1/5nYWltqZm19KkqysdJKyXP39cjtJcvHi9cxM1cd67kHsZb/Yy2Syl8lzs+wjufF7OXv2xA37LABgd7YtvTWbzbO9ymWazebRJF+R5JH9XtikKdccOx20yN5yp1EBAAA2VqWC+WlJ/k3vHGYtyc+2Wq1f3d9lTZ6yHL0Hs9b7umOKLAAAQJJqU2Q/nOTzbsBaDiX5EgAAoGu86TS3sDKrbbEZ+rotYQIAACQRMCsry3LkmpJ+i2wpYAIAACQRMMcyfAaz/6UhPwAAAF0CZkVlmZEK5uoUWQkTAAAgETAr68bIoTOYvV9VMAEAALoEzIrWXlNSOIMJAAAwQsCsrFwTMPstsge0HAAAgAkjYFZUZs0ZzN6vzmACAAB0CZgVdXPk8D2Y3V9VMAEAALoEzIrKrL2mxBRZAACAYQJmRWVZrrmmpPurgAkAANAlYI5huIJZ0yILAAAwQsCsaG2hst8i65oSAACALgGzou4U2Q2G/HQOZj0AAACTRsAcw+g9mN1fO1HBBAAASATMytYN+RlMkT2Y9QAAAEwaAbOiddeUDFpkJUwAAIBEwKysO8tn6Axm71fXlAAAAHQJmBWtr2D2p8gezHoAAAAmjYBZ2ZozmIb8AAAAjBAwK1p/D2aXa0oAAAC6BMyK1rbI1or+FFkVTAAAgETArKwsV68mSVbDpnwJAADQJWBWVo5UMPvaEiYAAEASAbOytTmyZoosAADACAGzonXXlPR+dQYTAACgS8Acw0ZnMAVMAACALgGzou6Qn1XFYIrswawHAABg0giYFZVluWGLbKmCCQAAkETArGxtjOyHzbZ8CQAAkETArKw75GfoDGb6U2QlTAAAgETArG5Njlwd8nPjlwIAADCJBMyKyqw5g2mKLAAAwAgBs6J1U2TTnyK7u4D5/NXF/PG5a7t6BgAAwCQQMMcwfA9mbY9aZP/lez+Vb/+Vh3f3EAAAgAkgYFbUHfKz+vv+17sd8jO31M6l+eVdPQMAAGASCJgVrc2R/Wrmbq8pWemUub7UNo0WAAA49ATMijYb8rPbYLjS7qTdKbPkQk0AAOCQEzAr6g75GboHc4/OYK70HjC3tLK7BwHAPms0GkmSU6dO5zWved0BrwaASSRgjmGkgtmfIrvLhNkPmNeX2rt6DgDst+GA+WVf9poDXg0Ak0jArGjdNX+4aT0AACAASURBVCX9CuYun9seVDAFTAAA4HATMCsqM1qp7F9T0t6jCqaACQAAHHYCZkVrrymp9X6z0t5dDXPQIrssYAIAAIebgFnR2iE/9V4Jc3m3FcxeQFXBBAAADjsBcxxrKpi1YrUCuVOmyAIAADcLAbOC/l2XxZrvN2q1vWuRVcEEAAAOOQFzDOsCZr3IctuQHwAAgETArKRXwExRjEbMRq3I8m4rmM5gAgAANwkBs4J+jXJNvkyjXuzdGUxTZAEAgENOwKxg7R2YfVO12q6nyLadwQQAAG4SAmYFgxbZNd9v1Is9G/KjRRYAADjsBMwxrGuRre3lkB/XlAAAAIebgFnBagVz7ZCf2p4N+dEiCwAAHHYCZgX7NeSnLMv0C6BaZAEAgMNOwKygLPdnyM9wODVFFgAAOOwa272g2Wzel+TfJnlJkk6SH2u1Wj+y3wubJFtWMHfRIjscMK8vCpgAAMDhVqWCuZLk77Zarc9M8oVJ/o9ms/lZ+7usSbX2DObuhvys9N57bLqeueV2OptUSgEAAA6DbQNmq9V6ttVqfbD39dUkDye5Z78XNkm2vKZkFy2y/Tswb5vpFpLntckCAACH2LYtssOazeb9ST4vye9t9bp6vcipU7O7WNZGz6zt+TO3M395IbNHp1PWuleIHJluZPbo9ODnRxr1dLI89rr6e1mqLyRJTh87kmevLKYxM51Tt83s3QZugIP4e9kv9jKZbpa93Cz7SG6uvQAAe6tywGw2m8eT/HyS/7PVal3Z6rXtdplLl+Z2u7YRp07N7vkzt7Ow1M7c/FLmFrsBc3l5JXPzS4Of14pkcbk99rr6e7lwpRswj011C8nPvngtRzq7u/bkRjuIv5f9Yi+T6WbZy82yj+TG7+Xs2RM37LMAgN2pNEW22WxOpRsuf6rVav3C/i5p8qwO+Rltkq3Xil3dg9lvrz3Za5F1VQkAAHCYbRswm81mkeRfJXm41Wr90P4vafJsNntnqlbbkyE/t81MJREwAQCAw61Ki+wXJ/lrST7SbDY/1Pve32u1Wv95/5Y1WcpeDXOvh/ysrBnyc13ABAAADrFtA2ar1frtrM9Wt6SN7sHcXYts9739gDm3vLLjZwEAABy0Smcwb3WbXlNSq+1JBfOkFlkAAOAmIGBWMIiQa0qYjd0O+WmPtsgKmAAAwGEmYFawWQVzql6kUybtHVYx+xXM40caKeIMJgAAcLgJmBUMhvysPYNZ6/7x7bRNtn8Gc6peZHa6roIJAAAcagJmFZudwax3v7PTNtl+MG3UBEwAAODwEzArWK1Prj+DmayepRxX/32NWi2zU3UtsgAAwKEmYFYwOIO57pqS7h/fcmd3Fcx6v0XWNSUAAMAhJmBW0juDuea7U/0K5i6H/DRqRY5pkQUAAA45AbOCfnxcX8Hsn8Hc3ZCf7hnMhhZZAADgUBMwK1i9pmTtGcxei+xOh/y0DfkBAABuHgJmBYP65LprSnY55KffIluv5di0IT8AAMDhJmBWUJabnMGs989g7qyC2R6+pmSqnrklQ34AAIDDS8Acw7p7MActsrsf8jM7Xc9Su8zKDtttAQAADpqAWUE5mPIz+v3BkJ9dXlPSD5hJtMkCAACHloBZwWq+XDvkZ4+myPbOYCbJ3LKACQAAHE4CZgWbX1OyyxbZ3vvqRTI73UiiggkAABxeAmYFmw756VUw27tokW3UihTFaousq0oAAIDDSsAcx2ZnMHcx5KfeC6nHpvoB0yRZAADgcBIwK+gP+Vl/BrPXIrvLCmYSFUwAAODQEzDHsP4M5i4rmO3OuoDpDCYAAHBYCZgVbHYGsx8O+9eNjGulUw4GBR1TwQQAAA45AbOCTa7BzNRgiuxetMh2p8i6pgQAADisBMwKBvXJYuN7MFd2MeSn/4zpepF6rdAiCwAAHFoCZgWrQ35G9cPhjof8tFcDZlEUOTZd1yILAAAcWgJmJb0zmGsSZv+KkZ1fU9IZDApK0guYrikBAAAOJwGzgnKT/FgURabqxe6G/NRW/wpmp+taZAEAgEOrcdALOAwGQ37WljCTTNVqYw/5ubKwnItL7SysdJIiudgLlUca9VwTMAEAgENKwKxgszOYSfcuzHGH/FxfbOehR17IuWuLaXfKPPTIC0m6V5TsrBYKAABw8LTIVlBm43swk+6gn50O+el0ytSGqqKNWpGllZ09CwAA4KAJmOPYIGFO1Ws7vqakU2Y0YNaLLO3wTk0AAICDJmBWsFWL7FS9yPIOh/y0yzL1ob+BRk3ABAAADi8BcwwbDflp1Iqs7DAUbtQiu6xFFgAAOKQEzAq2rmDWdnwPZqdM6kMBs66CCQAAHGICZgWDIT8bJMzdDPlpl2VqIy2ytSytmCMLAAAcTgJmBVtFvl0N+dloimy7k7IUMgEAgMNHwKxgtUV2fQlzN0N+Npoim2THLbcAAAAHScAcw2Ytsjsd8tMuy9SHntmodX/jHCYAAHAYCZgVbNWyuqshP50ytdrokJ8kWTRJFgAAOIQEzAr68XHTCuaOW2TXn8FMVDABAIDDScCsYPtrSnbaIrtatUxWA6YKJgAAcBgJmBWsVjDXR8zuNSW7mSI7/KzuX8eSgAkAABxCAuYYNq5g7mzIT1l2b9ccbpHtVzN3WhEFAAA4SAJmBfsx5Kfde+ZIi2xvpOyigAkAABxCAmYFgzOYezjkp9PLkBsO+dEiCwAAHEIC5hiKDZpkdzrkp1/BrG1wD+biys7OdAIAABwkAbOC7a4p2cmQn85GLbKuKQEAAA4xAbOC/hnMzYb8tDvlluc0N7JRi2zdFFkAAOAQEzArGETHDRLmVL37RzjuOcxBBXOjFlkVTAAA4BASMCsYDPnZIGE2BleLjBcwB2cwN5giW6WCWZZlfuL9T+bC3NJYnwsAALBfBMxdavQqmOMO+tntFNnnri7mn/3mY/n1R86N9bkAAAD7RcCsoOw1yW405GeqX8HccYvs8BnM6i2y88vtJMmL11QwAQCAySBgVrDaIrveVK+tdWXMCuZG15TUiiKNWlGpgrmw3H3N+euLY30uAADAfhEwx7BhBXOnQ346689gJsl0vVbpmpJ+BfOcCiYAADAhBMwK9mPITz+P1tak1ql6kcUqFczea168LmACAACTobHdC5rN5o8n+XNJXmi1Wp+9/0uaPFtdU7LTIT/tDa4pSZKpRq1ii2y3gnlewAQAACZElQrmv07ylfu8jolW9sLghmcwdzrkZ5ctsv0zmJcXVioFUgAAgP22bcBstVq/meTCDVjLxNv4DOZuh/ysCZiNWsUW2fbga22yAADAJNi2RXYn6vUip07N7vEza3v+zO3MX17I7NHpNBr1JOl+XVvN5DNHpnL65NHu17PTlde3cHUxU1PdP/pjR6cye3R69ZmNesqiwp9fb01Jsljl9fvkIP5e9ou9TKabZS83yz6Sm2svAMDe2peA2W6XuXRpbk+feerU7J4/czsLS+3MzS9lqXfecWF+eaSldWFxOQtz3erhxcvzlddXFrXMLywnSZaWVjI3v1qBbNSKXF9Y3vZZF68sDL5+7PmreeC2I9U2tccO4u9lv9jLZLpZ9nKz7CO58Xs5e/bEDfssAGB3TJGtoMzmF2GuDvkZ7wzmZi2yU40iiyvbP2t+ebWN9sVr7sIEAAAOnoBZQbl5vtz5kJ/ey+s7HPKzuNLOsel66oUzmAAAwGTYNmA2m82fTvLe7pfNp5rN5t/Y/2VNpmKDKT9TvQrmuEN+BlNk1zxyuvI1JZ3MTtdz5th0XrwmYAIAAAdv2zOYrVbr627EQiZZuUVxstFLiCtjVjBX78FcX8FcrBBW55fbOTpVz7Hpes6pYAIAABNAi2wFZTa+oiRZvaZkedwKZrnFPZiVrinp5EijljuOTee8gAkAAEwAAbOScsPzl8nOh/x0ehlyo3swq5zBXOhVMO84rkUWAACYDAJmBWWZFJtEzKkdtsh2Bi2ya55XL7JYoYI5v9zJTKOWs8eO5OL88thnQAEAAPaagFnB1i2y/QrmeAGvvVmLbG/IT7nVwc8kCyvtzEzVc+b4dBKTZAEAgIMnYFawH0N+Nm2RrddSVnhet0W2ewYziXOYAADAgRMwKyk3rWA2djHkZ+0VJUk3YCbZtk12YaWTmUZ9EDBVMAEAgIMmYFbQPYO5sVpRpF4rxh7y0y7LdVeUJN0W2WT7wLqw3MnMVC1ney2y5wz6AQAADpiAWUH3DOZmEbM76GcnLbJrz18mq2c6t6tgzi93z2Cenp1OERVMAADg4AmYFZTZvIKZdNtkdzLkZ+35yySZbnS/t7RFRXSl3clKp8xMo5ZGrcjp2SkBEwAAOHCNg17AYbDdRNepWm1H15SsvaIkWT2DubRFBXOh97OjU/UkydnjRwz5AWDfffqnN3Px4ou5//6XH/RSAJhQAmZFW3TIZmoHFcxOp9ywRXYw5GeL5y0st5MkM1Pd195xbNoZTAD23d//+9+dU6dmc+nS3EEvBYAJpUW2gm4Bc/OE2aiPX8Fsl+uvKElWh/xUqWDONLoVzDuOTWuRBQAADpyAWUF3yM/mP5/awRTZTlmmvsGffpUW2fleBfNor4J55vh0Ls4tbRlyz19fym9+4vxYawQAABiHgFlFWe75kJ9OZ+MhP1ONrVtkF8vk3PxKkmQ5RS4utXNsppFOmTy/RRXzFz78bN7yH/9oEE4BAAD2mjOYFWw3RXYnQ342vQdzmwrm3HI77/tktxL58LNXcn1hOc9cnE+SPHV5IfecOLLh+y7PL6dM8uK1pdx3+uhYawUAAKhCBbOCbe/B3EkFs9z4HszVa0o2f17/s6Z67z92pPvfE1zYooJ5ZaFb9Tx3fXGsdQIAAFQlYFawzS0lOxry022RXf/9qf4U2S3OYPbPe0717jmZ7Z3F7IfIjVxd7P7sRdNmAQCAfSJgVlCm3HLIT2MHQ3522iKbJCv9Cmbvtf37MK/ML2/6nqu98GnaLAAAsF8EzCrKbc5g7mWLbD9gbvG8pTUVzCONWmrF1hXMKyqYAADAPhMwK9j+mpK9a5Ht34O5VYvsSqf7s0YvjBZFkZmpeq4sqGACAAAHR8CsoBsdtx7ys7KTezA3SK31WpF6UbGCOZRQj07Vq53BFDABAIB9ImBWUG7TIluvFVnujNci296kRTbpVjG3rGC2O2nUipHJtrNTtU0rmEsrncHztMgCAAD7RcCsoCy3HvIzVa+NPeSn2yK7ScCs17Yc8rPcLgcDfvq2qmD2z182aoUKJgAAsG8EzIq2Dpg7GfKz8RnMpDu0Z7t7MPsDfvqObnEG81oveL7s9qO5uriSheX2WGsFAACoQsCsYNszmDsY8rPZNSXJ9i2yy51yfcCcrufq/ErKDS7t7FcwX37mWBLnMAEAgP0hYG5ibqmd+X6lb5szmI2dDPnpbHEGs14bDPLZyHK7k6na+hbZ5U6ZuQ2qk/0Jsg/cPpskOS9gAgAA+6Bx0AuYVH/vVx9Oo17kf3jg9pTZ+gxmo1Ybe8jPti2y257BXNsi2w2cl+dXcmx69K/1ymK3dfaBM92Aec6gHwAAYB+oYG7isQtzefLifJLtp8h2z2CWG7anbmbLFtl6LYvbnMFsbDDkJ0kub3AO8+pCt6r58ju6AVOLLAAAsB8EzA2UZZnz15dyYa4b1raLjf1qYnuMc5hbtshWqGBOr6lgzk73Aub8BgGzV8G89+RRk2QBAIB9I2BuYG65ncWVTi7PL6fTKbsVzC16ZPvnIccZ9NMuN7+mZLsW2ZXO5hXMS/Prryq5srCSmUYt041azhybzovXFiuvEwAAoCoBcwPnr69WLvtDc7Yb8pOk8l2Y/UpnfZOHbt8iW2a6ttkZzPUVzGuLK7ltpnsu845j0yqYAADAvhAwNzA8ZfX6UjvlNmNkG70KZtVBPyu91+28RXZ9BXOmsfkZzCsLKznRC5hnj08b8gMAAOwLAXMDwwHz2uJKpSE/yU4qmJu0yNZrWdqkglmW5YZTZGu1IieONHJ5gxbZq4srOXGkGzDPHJt2TQkAALAvBMwNrK9gbnMGcxAwK1Ywe0G0tsmf/lYVzOV2mTLJVH39m0/MNHJpoyE/C6sB8+zx6VxeWNmyQgoAALATAuYGzs8tDSqW16tUMMcc8tN/3WZDfqa3qGAurHTPhK6tYCbJbTONja8pWXMGM3FVCQAAsPcaB72ASXTh+nLuOD6d64vtXF9qJymzRQFzMORnpWKLbP8M5qYtso1i0wrjYu/7UxuUP2+bmdpwyM+VhZUcPdLIxaV2ZnqVzMcuzefo0akkyexUPUfWLKUsyy2rtgAAAGupYG7g/NxSbp+dzqnZqe4ZzG1eP+6Qn/4ZzK2G/LTLjSuiC8tbVDCPrq9gtjtlri+1MzNVz0OPvJBPnrueJPmNR8/loUdeyEOPvDCYlNtXlmX+xk9/KP/idx5f96zv/fVH03r+WqV9AgAAtxYVzA2cv76UM8emMlWv5crCcookxRZNsuMO+Vltkd3459O985VLK500pusjPxtUMDc4g3nbzNS6M5jXFrtDf/pnMI/3nndtcTRUDvvQ01fykWevrvuMZy4v5Jc++lxOzU6ledfxTd8PAADcmlQwN3D++lLOzE7n9LGpXF9a6Q352fz1/YC5UrWC2d5mimxjNWCutbjcD5gbn8GcX+6MvO9qL2Ae6wXM2el6iiK5vrR+2mzfL33k2STJU5fmR77/ZO/3T1ycX/ceAAAAAXONTlnm/Nxyzhybzu2z3XOYZVluOeRn0CI7bgVzsxbZXuVwcYNBP/ODIT8bVDB7ZyqH22SvLPQrmN3KZVEUOT7dGATPta4urOSdj76Y6XqRc9eWBhXTJHnq0kKS5EkBEwAA2ICAucaVhZW0O2XOHOuewVxY6WS5s/WQn6kdDvnZtEW2QgWzscGb+5Nih+/C7AfJ40dWu6GPHalv2iL764+8kMWVTt7wqrtTptsW29evaD55aT6dstpeAQCAW4eAuUb/DszbZ6dy+2y3Inh9i/OKyepE18r3YG5zTUm/RXajCubCVhXMXsAcPod5dWF9wDxxpJHrm1Qwf+kjz+WVZ4/ldc2zSVbbYoe/Xlzp5Nw115wAAACjbvmA+YEnL+Un3v/k4Pf9gHnm2HROz3bvjLy+tLLllR39a0o2u7tyrZVtzmBO1Xd6BnODFtl+BXNmNWAeP9LI5YWVlGuqkI88fzWPvHAtf/FzXpJ7Tx5NMnoO86lL84MQ+8TFua22CAAA3IJu+YD57z7wdN76W48Nrv84P7caMPsVzA1uCxlx9ng3iD57ZbHSZ253TcmRrQLmFlNkTw5aZFcD5rUNKpgvOXEkiyudXJgbnTj7Sx95LkcatXzlZ96Zk0cbOX6knqd75y7bnTJPX17In3rZ6STOYQIAAOvd8gHzkeevpl0mH3+xez/k+evd0HXHsemcPjY9eN1WQ35um5nK3Sdn8kjF+yGrnsHcsEV2i3swTwyG/Ky2v15ZXEmjVmSmsfpXfe+pmSTJ073zlUVR5MLiSh76+Pn8yftPp12r5dJyJy+5bSaPXZzPYpmcu7aY5XaZV997MkcatTxxcSEAAADDbumAeWFuKS/0zhL2w+GF60s50qjl2HQ9p3qBLdn6mpIkefDO43nkhauVPrd/BnOzFtmthvwsrHRSbPLe6XotR6dq685gnjjSGGnxPXNsOjONWp7utb/Or3Ty8x98OuevL2W2UctDj7yQhx55IY1akY+fu5a55fbg/OVLTx/NvadmRs5mAgAAJLd4wByuOPa/Pj+3lDOzUymKIlP1Wmamun9ExZY1zOTBu47nqUsLg6E6W+mfwdy2RXaTIT9T9dqmZ0JPzkyNtMheXVzJiaHzl0m3YvlpJ2cGFcxk9W7Ll90+O/je6aPdZ7U75eCKkvtOH819p45qkQUAANa5pQNm64VuqPzvXnIij/S+Pn99KbcPtcYen+6Gs+0qmM07jydJHj23fZvsagVz458PWmQ3GfKzUXts36mjUyMtslcXVgaDeYbdc3Kmd89lt+X2Uxfmcny6Pjh3miSnZ6fS6bXHPnVpPlP1InceP5KXnj6apy7PD86SAgAAJLdQwLyysJxf/aPnRianPvz8tdx3aiaff9+pfOLF61la6eT89eWcmV0NmMeO1Cs9/8G7ugGzyjnM9uAM5iYtsv2ptJsM+dkqYJ482hipYF5ZXBkZ8NN3T+8c5jOXF1OWZZ64OJ+X3j47Uhnttwg/c2khT15ayN23zaReK3LfqaNZbpd57mq3qjm/3M7bf++JwQReAADg1nTLBMyfeP9T+a5fezQffubK4Hut56/mwbtO5DPvOp6VTplPnL+e89eXcmaognlsuhswt6tg3j47nTuPTw8qoVvZtkW2sUWL7HJ7wwmyfSdnpkbOYF5bXMltGwTMu2/rD/qZz1MX53N9qZ2XnT468prTvWrms1cW8qmL83nJyZlcXGrn9IkjSZKPnbueKwvL+aWPPJf/97cfzzf9+z/Mc1cM/wEAgFvVLREwy7LMO1rnkmTw66X55TxzZTEP3nl8UH386LNXc2l+OWeOrbaJHuu3yG5zBjNJHrzrRFoVKphVh/wsrnRSlmW+59db+S8PPz/4XmOz8bNZ3yJ7ZWH9GcwkmZmq545j03n60kI+8szlJMnLbh8NmCeONFKvFXni4nyevDiXdqfMQ4+8kE/1Ju6+p3Uu1xfb+dU/ej53n5zJhbml/M2f+cN86oI7MgEA4FZ00wXM5XYnP/vfnsncUnvwvYefv5anLy/k2HQ973z0xbQ75eD8ZfOu47nn5EyOH6nnfY9fTJmMVDCP91pkt4+X3Umyj1+Yy/xye8vXrQzuwdz458NDfj78zJX88kefz1t/6/GsdMosrHQyvVUF82gjVxdW0u6UKcsyVxeWc2KDCmbSbZN95vJCPvzU5dw20xiZmpt0hwGdOtrIw89dzXK7zOnez49N1zNdL3JhbjmfOHctrReu5S+/+p78i//lc7O00snf/Jk/zI/+zuN5asxJs+9/4uLg7wUAADh8Nk4eh9h/+NAz+eH3fDIvXFvM3/6SB5J0q5aNWpE3fekD+f53fjwfevry4Kzkg3ceT1EUefDO4/n9T11M0m137TtWcchP0g2rZZJHX7iWz73nZM5dW8wvf/S5/JXPvzczU6tnOVe2OYNZrxWpFd0zmP/hQ8+kSPL81cX89ifOZ2G5ncZWZzBnplKmO9xnulFLu8yGQ36S7qCfP3z6Sn7/8Yt58K7jG06mPX10Oo8+371+pd8yWxRFTs9O58L1pfznjz6XRq3In3rFmZw8OpUf/JrPzlt/47H8q/c9kbe974l8xtljmZ2u9/ZUpF4UqdW61d5v/KKXDaqx73/iYr755z6Sk0en8h++/gty28zUurUAAACT7dBWMK8truTNv/jRvOvRc4PvzS21869/78kUSX7mg0/nxetLKcsy72ydyxfefzpf9Vl3ZaZRyzta5/LI89dy921HcrJXlXvwrhNZ6A3V2egMZhUP9ibJtl64lrIs873/9dH8i9/5VN72vidGXtfepkW2KIpM12t5+vJC3vnoi/lLr7o7LzlxJD/7oWd6Q362qmB293NhfilXFrpnMTetYJ7stsSudMqR60mGnepNkk0yqGAm3bB5/vpS3vGx5/PyO2bzwU9dzEOPvJCPP38tr3/wbP7Wl9yfr/+il+X22ek0akU6ZTcwX1tayYvXlvLj73si3/YrH8sL88v52Lnr+bZfeTh3npjJpfnl/NBvfDKLQwNqHzs/l2uL21//AgAAHKxKFcxms/mVSX4kST3J21qt1j/a11UNWVrp5Pvf+ce598yxfMMX3JOiKFKWZb7/HX+c3/rkhbz/iUt54MxsXn7mWH7mg0/n4vxyvvt/bOa7/ksrb3/fE3n9Z96Z564u5n//0/fn6FQ9X/KKM3nXoy9mdqqWB+86MficfjhMMnoG80j1CubZ49O5fXYqjzx/Le969MX87mMXc/dtR/KTf/BUvvLBO/PpZ48lWQ2Ym7XIJt1BP+/qtfN+7avvydnj03nrbz+e6XotzbuObfq+V549lnqR/H+//Xi+8YteliQbnsHs73OmUcvCSmfdgJ++fqgsitXwmmSwzyT5is+4Y937bpuZyl/8zLtGpvYO+9kPPp23/c7jefrSH+bKwkqWVzr5C59/Vz745OX8p48+n6948M588UtP5d++/6m89bcey+x0PW941d1546vvyamZRs7PLefq4koeuH029S3OpAIHY6F3VGC4ewMAuPltGzCbzWY9yVuTvC7JU0ne32w2f7nVan1svxe3uNLJt/3yx/I7j11Ikrx4eT7f+tpPz3/8yHP5r61z+cuff0/+y8deyHf8ysP551/zOfmJP3gyX/aKM/mqz7wr/+2py/mFDz+bF64tZrpe5EtfcSZJ8rrm2byjdS6X5pO/+Cc+bfBZ/UE/SUauKTnenyJb4RRmURRp3nk8f/jMlbz38Yt58M7j+af/82fnjf/mA/m+dzyat33dq1IrisEU2c0qmEl30M/lhZV80f2n89LTR/MXPucl+Zfv/VSW2p1MbZFMX3HHsbzpy16eH37PJwf3aG5WwSyKIveeOpq55fZIeBzWP5d5cmZqJMid7v0ZnTw6lZffsXHgnV/p5L1/fG7Dn/35z707T12Yy689/EKKJF/76rtzenY6X/KKM3nkhWv5kYc+kf989nje8cgL+dOvOJN6kfzb338yP/H+J1OWST+23ndqJm989T157Weczfsev5hf/aPn8sfnruerPuuufN2r78ndJ2fy9OX5vOePz+fK4kq++rPuykt7Ybosyzx5aSHT9SIv6U3VhVtdWZY5P7ecM7NTI23zz11ZyKPnrucLX3Z68L3ldif/7gNP59krC/nGL3rZoPvjdx/7/9u79+i6yjKP49+Tk+Tk3tzT9EKS0vAUWjFFp5TLqgUHhIEBvlEu9wAAECBJREFUWQNOHe+XcRwVHda4FGRGHHUtHdAB11xcyxFvA4qIOjAgIgIqDsJQKAKFPpje00uSNmmapGlOc5k/9j4lrUnaXHfO4fdZq+ucs8/O7vOe95z33c9+3713B5/92SYuW17DdWtPnfUyiIiISHROZgRzFdDs7lsAzOwu4EpgRhPM/oEhPnXfRp7Y2slnLmqkvW+A//ztVg70HeHxLR2cXVfKJ960hHPqy/j4j1/k3XduoLd/kA+fVw/AB1bX8cDGVn7VvJ+1SyuO3gvy3IZyCnPj9CYHsRGjlovL8o9Ohx15xD0/Nx6klic5SLaspojfbeskKwZffetyKgpzuW7tEm560Pnx7/dwTdMCBoaGicGo5zympC7k87aVC4AgobtoWTUPbGwd9z6YAG8/ayEvt/bw85fbgLHPwQS49IxqXndKGVvD8yyPlzrv8vgLAJWHyy+wqkmPIK5cNI/iRDZDw8M0VARJam52Fhcvq+Ke5/awbf8hzl9SzvkNZUfPk31+90FOrSpi8bwE8awY//3CXm55dDO3PLoZgAXz8njdwnn86Lnd3L1hFwtL89nZGVxsKCsG33pyB6vrylhUmsfvtnWyqyu4rcrKhSVc9YZF9PQmeXJbJ+t3HKCyKJc1p1ZwTn0Z+w8l+f2ug7zS1sOSykLObSjnzNpimvf18mxLF9s6+lg+v5iz60qpKy9gU2sPz+8+SMehJMvnF3PmghKKEtl4Ww+bWnsYGh5mWU0Ry6qLGWaYzfsOsXV/L/m5cU6tKKS+vIDe5ADbOvpoOdBHeWEu9eX5zC/Oo/NQku2dfezrSVJTnOCU8nxK83No70nScqCPnv5BTls4QEkcEtlxWrsPs6ern4GhIWpL8qgNk+m93f3sPXiYnHgWtSUJKosSHBkcovVgP209/RQlsplfkqAsP4fe5CB7u/vpPJSkLD+X+SUJCnPjdPUN0NrdT09ygMrCXGqKE+RmZ9HRm6S1J8mRgSGqixNUFeUSA/b1Jmnt7ieeFaO6KEFFYS4DQ8O0dffT3ttPQU6cmuIEpeH/2drdT7Kjj+zBQWqKExQnsuk6PEDrwX66Dh+hsij4P/Oy4+zvTbLn4GH6B4aoKU5QUxx8R1q7+9nddZhYDGpL8phfnODI0DC7uw6z92A/eTlZLJyXR1VRgp7+AVq6DtPa3U95fg6LSvMoL8xlX0+SnQf62N8bfOZ1ZQUU5WWzu+sw2zsO0d0/wCll+dSVFZATj7G9o48tHb0MDUFDRQENFQXE+o7wzM4DbN53iILcLBori6ivKKDzUBJv62HL/kNUFeWyrKaYurJ8tnf28eLug+zo7KOhooAVtSXUliTYuLebDS1dtPckWV5bzFmL5lGUyObpHQd4ansnh48MsuqUMlbVlZIcHObXzfv4360dFOTEWXNqBecvKWd7Zx8PvtTG41v2U1dewCWnV3N+QzlPbO3gR8/t5pX2XpZWFnJNUy1nLS7lh8/u4t4X93JkcJjqolw+csFSKnPjfOWxZrZ19BGPwUOb2vjI+Q109Ca5/ckdLK0q5JqmBZNqG0RERCR9nUyCuRDYOeJ1C3D2zITzqq//dhtPbO3kxosaeeuZtcybl09//wDfe3onFYW5/NOly8iKxVhdX857z17Mt5/ayVuWVR2dhlpTnODqpgV8/5ldXGRVR7ebyM5izakVPPhy2zGjllmxGKdVF7G/N3lMHFmxGIW5cU42h0pNtb2maQFnzA+m4F56ejX3b2zl5kea+eqjzQzDCZOy1E7vOfXlR5e9rWlBmGCOf+psLBbjxosa2bKvl1fae8ecIguESUTemAlmaX4OWbFXE82U2pI83ri4lKuaFrJp14Fx4xlPqr5Gaqwq4n3n1nGwN8lpIw4ClBfmsraxkgtHTL1dY9W8tOcg67cfYOXieZxeW8Ljr7Tz+gXFrN/RRWv3YS5srOS06iLWLqvmwY17eeDFvWzY1UXTonlc1bSA3v4BfuntfPa+4JhJTUmCN51WSVt3P3c9u4s71rcAUJATp6GygIe9nXtf2Hs0rqwYVBUneOwP+/i3x48tS048xvcHR58mPJ4Yr47SjpQV4+g5sZNdPtFtx2MwWhEmsjz1bT9+9enY9nixj1bWmfxsx9v+ZI31eRbkxvnJ83uOWV6UiJPIjvPQpmNnDjRWFbK76zCPb+k4uiw/J4vzGsr5Q3svX3r4D0eXL60s5K/POYVfN+/nS79sDmLIinHFihpW15Vxx/pdfO5/gt/Kgnl53HbVChaW5nHzI83c/Eiw/mXLa7j+zUs1PVZEROQ1KDbWOXIpZnYN8BZ3/2D4+l3AKne/dpw/awe2T1uUIiLyWlYHVJ1wLTkR9c0iIjJdxuybT2YEswVYPOL1ImD3Cf5GOwIiIiJzi/pmERGZcSeTYD4NNJpZA7ALWAf81YxGJSIiIiIiImnnhPfBdPcB4GPAQ8DLwN3uvnGmAxMREREREZH0csJzMEVEREREREROxglHMEVEREREREROhhJMERERERERmRYnc5GfyJnZJcDXgDjwTXf/csQhTYqZfQu4HGhz9xVRxzMVZrYY+B4wHxgCvuHuX4s2qokzszzgN0CC4Pdwj7vfFG1UU2NmcWA9sMvdL486nskys21ANzAIDLj7GyMNaArMrBT4JrCC4DaZ73f330Ub1cSZmQE/HLFoCfBZd78topBklmVKf5wyVl9mZuUE3/V6YBvwNnfvNLMYQfn/DDgEvNfdn40i9sk6vo8IL+J4F1AOPAu8y92TZpYg+GzeAOwH/tLdt0UU9qSM1vYCTgbWrZldB3yQoJwvAO8DasmQuh1tH3oyv1Mzew/wD+Fmv+ju353NcpyMMcp6C/DnQBLYDLzP3Q+E790AfIBgf+nj7v5QuDyy9nrOj2CGDeG/A5cCZwBvN7Mzoo1q0r4DXBJ1ENNkAPh7dz8dWA18NE3rpR+40N1fDzQBl5jZ6ohjmqpPEFyQKxNc4O5N6Zxchr4G/NzdlwGvJ03rxwNN7t5EsGNyCPhpxGHJLMmw/jhlrL7seuARd28EHglfQ1D2xvDfh4Cvz37IU3Z8H/HPwK1hWTsJdlQJHzvdfSlwa7heuhmt7c24ujWzhcDHgTeGCUmc4K4PmVS33+GP96EnVJdhQnoTcDawCrjJzMpmPPKJ+w5/XNaHgRXufibwCnADQNherQOWh3/zH2YWj7q9nvMJJsEXoNndt7h7kuBIzJURxzQp7v4boCPqOKaDu+9JHQ1y926CRnthtFFNnLsPu3tP+DIn/Je2V74ys0XAZQRHbGUOMLMSYA1wO4C7J1NHHdPcm4HN7r496kBk1mRMf5wyTl92JZAa2fgu8Nbw+ZXA98K+40mg1MxqZznsSTu+jwhHei4E7glXOb6sqc/gHuDN4fppYZy2NyPrlmAWVr6ZZQMFwB4yqG7H2IeeaF2+BXjY3TvcvZMgaZtzAz+jldXdfxHe2QPgSWBR+PxK4C5373f3rUAzQVsdaXudDgnmQmDniNctpGEik8nMrB5YCTwVcSiTEh7peQ5oI2h40rIcoduATxFM9Up3w8AvzOwZM/tQ1MFMwRKgHfi2mW0ws2+aWWHUQU2DdcAPog5CZlVG98fH9WU17r4HgiQUqA5XS/fP4Pg+ogI4MGLHdWR5jpY1fL8rXD9djNX2Zlzduvsu4CvADoLEsgt4hsyt25SJ1mXa1vFx3g88GD6fk2VNhwRztCMqaTvClGnMrAj4MfB37n4w6ngmw90Hwyl/i4BVZpaW58eaWWq+/jNRxzJNznP3swimd3zUzNZEHdAkZQNnAV9395VAL69O40lLZpYLXAH8KOpYZFZlbH88gb4sbT+DMfqI8cqTtmUNTbTtTdvyhtM8rwQagAVAIUHfebxMqdsTGat8aV9uM7uRYGr/neGiOVnWdEgwW4DFI14vAnZHFIuMYGY5BB3yne7+k6jjmapw6syvmIPTJU7SecAV4cVx7gIuNLM7Io1oCtx9d/jYRnCe36poI5q0FqBlxMj4PQQ7PensUuBZd2+NOhCZVRnZH4/Rl7WmpkeGj23h8nT+DP6ojyAY0SwNp1XCseU5Wtbw/Xmk12k+Y7W9mVi3fwpsdfd2dz8C/AQ4l8yt25SJ1mU613HqAkWXA+9w91SyOCfLmg4J5tNAo5k1hEfN1wH3RRzTa144V/924GV3/5eo45ksM6sKrzKHmeUTNNKboo1qctz9Bndf5O71BL+TR939nRGHNSlmVmhmxannwMXAi9FGNTnuvhfYGV6BFYJzF1+KMKTp8HY0Pfa1KOP643H6svuA94TP3wPcO2L5u80sFl4Qris1RW+uG6OPeAfwGHB1uNrxZU19BleH66fNaM84bW/G1S3B1NjVZlYQfqdTZc3Iuh1honX5EHCxmZWFo74Xh8vmvPCKsJ8GrnD3QyPeug9YZ2aJ8IrQjcD/EXF7PedvU+LuA2b2MYIvQBz4lrtvjDisSTGzHwBrgUozawFucvfbo41q0s4D3gW8EJ6/CPAZd/9ZhDFNRi3w3fBqW1nA3e5+f8QxCdQAPw33C7KB77v7z6MNaUquBe4MG/ktBJePT0tmVgBcBPxN1LHI7Mqk/niEUfsy4MvA3Wb2AYKd92vC935GcOuDZoKrKKftb3mETwN3mdkXgQ2EF8UJH//LzJoJRrfWRRTfVIzW9maRYXXr7k+Z2T0EtyIZIKjHbwAPkCF1O9o+NBP8nbp7h5l9gSD5Avi8u8+5kdsxynoDwS31Hg73jZ509w+7+0Yzu5vggMIA8FF3Hwy3E1l7HRseTscDFiIiIiIiIjLXpMMUWREREREREUkDSjBFRERERERkWijBFBERERERkWmhBFNERERERESmhRJMERERERERmRZKMEXmCDP7gZk9b2bXjbPOWjPTbVRERERmgfpmkYmb8/fBFHktMLP5wLnuXhd1LCIiIqK+WWSylGCKTJGZ1QP3u/uK8PUngSKCmxd/mODGty+5+zozKwT+FXgdwe/vc+5+L/ALoDq80fe1wBeAT7r7ejOrBNa7e/3slkxERCQ9qW8WiY6myIrMnOuBle5+JkFnBnAj8Ki7/wlwAXBL2LFdAWx29yZ3fzyacEVERDKe+maRGaYEU2TmPA/caWbvJDhSCnAxcH14NPRXQB5wSjThiYiIvOaobxaZYZoiKzJ1Axx7sCYvfLwMWENwBPQfzWw5EAP+wt195AbCqTxjbTMPERERmQj1zSIR0QimyNS1EpyjUWFmCeBygt/WYnd/DPgUUEpw7sdDwLVmFgMws5VjbHMb8Ibw+dUzGLuIiEgmUt8sEhElmCJT5O5HgM8DTwH3A5uAOHCHmb0AbABudfcDBBcIyAGeN7MXw9ej+Qrwt2b2BFA5w0UQERHJKOqbRaITGx4ejjoGERERERERyQAawRQREREREZFpoQRTREREREREpoUSTBEREREREZkWSjBFRERERERkWijBFBERERERkWmhBFNERERERESmhRJMERERERERmRZKMEVERERERGRa/D/dnYkM0aDRswAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,9))\n", "sns.distplot(np.log((rev.useful + 2).compute()), ax=ax1)\n", "sns.boxplot(rev.useful.compute(), ax=ax2)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "203142 Dinner for 1.\\n\\n- Preface\\nI went to Amy's Ba...\n", "Name: text, dtype: object" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rev[rev.useful == rev.useful.max().compute()].text.compute()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Approaching parsing of vast text\n", "Another simple task to test the library - we will try the TfidfVectorizer. I'm working with this instead of the regular counter because it will keep our values between zero and one for the visualization I want to map." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['best', 'came', 'chicken', 'come', 'definitely', 'did', 'didn', 'don', 'food', 'friendly', 'good', 'got', 'great', 'just', 'like', 'little', 'love', 'nice', 'order', 'ordered', 'people', 'place', 'really', 'restaurant', 'service', 'staff', 'time', 'try', 've', 'went']\n" ] } ], "source": [ "@delayed\n", "def ret_count(ddf):\n", " vec = TfidfVectorizer(stop_words='english', ngram_range=(1,2), max_features=30)\n", " X = vec.fit_transform(ddf)\n", " names = vec.get_feature_names()\n", " return X, names\n", "\n", "ddf, names = ret_count(rev.text).compute()\n", "print(names)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Success! \n", "Our vectorizer didn't overload the system!\n", "\n", "Unfortunately, the counts are exceedingly high and overpower the other observations making this unhelpful." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "def top4(val, ax):\n", " sns.distplot(ddf.todense()[:,val].reshape(1,-1), kde=True, ax=ax)\n", " \n", "fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16,9), sharex=True, sharey=True) \n", "axes = [ax1, ax2, ax3, ax4]\n", "\n", "for val, ax in zip(range(4), axes):\n", " top4(val, ax)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dask hurdles overcome\n", "After some hours of troubleshooting (not shown in project), I comfortable with the distributed dataframes\n", "\n", "## Other memory improvements - Previous attempts at spaCy\n", "In a previous project, I limited my vocabulary and text subset to one million words as spaCy throws an error when too large a vocabulary exists for use in memory. I have since discovered a lazy computation design for this exists within spaCy called the pipe. This allows spaCy to batch the documents and prevent breaking memory constraints. This has been a helpful discovery.\n", "\n", "### Steps to find review value\n", "The steps below will be the analyses we take to find explicit value in reviews.\n", "\n", "#### Graph the count of words\n", "Maybe the frequency of certain words is more prevalent in more valuable reviews\n", "\n", "#### Word combinations?\n", "Are there combinations of words (phrases) that exist frequently?\n", "\n", "#### Readability\n", "Might be a factor\n", "\n", "#### Length of review\n", "Might also\n", "\n", "#### Combining the metrics\n", "Also checking if industries respond differently" ] }, { "cell_type": "code", "execution_count": 345, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['amazing', 'best', 'better', 'came', 'chicken', 'come', 'day', 'definitely', 'delicious', 'did', 'didn', 'don', 'experience', 'food', 'friendly', 'going', 'good', 'got', 'great', 'just', 'know', 'like', 'little', 'love', 'make', 'menu', 'new', 'nice', 'night', 'order', 'ordered', 'people', 'place', 'pretty', 'really', 'recommend', 'restaurant', 'right', 'said', 'say', 'service', 'staff', 'sure', 'time', 'try', 've', 'wait', 'want', 'way', 'went']\n" ] } ], "source": [ "'''@delayed\n", "def ret_count(ddf):\n", " vec = CountVectorizer(stop_words='english', ngram_range=(1,2), max_features=50)\n", " X = vec.fit_transform(ddf)\n", " names = vec.get_feature_names()\n", " return X, names\n", "\n", "ddf, names = ret_count(rev.text).compute()\n", "print(names)'''" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Crunching bag of words, need lemmas\n", "We will try this again with some preprocessing - the benefits of lemmatization would increase the diversity of words selected for features." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Basic text cleaning\n", "def fix_nl(mytext):\n", " text = re.sub(r'\\n\\n', '', mytext)\n", " text = re.sub(r'\\n', '', text)\n", " final = re.sub(r' ', ' ', text)\n", " return final\n", "\n", "# Basic lemmatize function\n", "def lemma_sent(sent):\n", " return ' '.join(word.lemma_.lower() for word in sent if not word.is_punct and not word.is_stop)\n", "\n", "@delayed\n", "def process_n_count(series):\n", " series = series.apply(fix_nl)\n", " spaCy_text = nlp.pipe(series)\n", " vec = CountVectorizer(stop_words='english', ngram_range=(1,2), max_features=300)\n", " vec.fit_transform([lemma_sent(text) for text in spaCy_text])\n", " return vec\n", " \n", "@delayed\n", "def test(series, vec):\n", " series = series.apply(fix_nl)\n", " spaCy_test = nlp.pipe(series)\n", " X_new = vec.transform(series)\n", " return X_new" ] }, { "cell_type": "code", "execution_count": 412, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<5000x100 sparse matrix of type ''\n", "\twith 52872 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 412, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# This cell will test the steps of my approach against a much smaller dataframe\n", "df.text = df.text.apply(fix_nl)\n", "new_txt = list(nlp.pipe(df.iloc[:5000].text))\n", "cvec = CountVectorizer(stop_words='english', max_features=100)\n", "cvec.fit_transform([lemma_sent(txt) for txt in new_txt])" ] }, { "cell_type": "code", "execution_count": 426, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<1000x100 sparse matrix of type ''\n", "\twith 10395 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 426, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cvec.transform([lemma_sent(txt) for txt in list(nlp.pipe(df.iloc[5000:6000].text))])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Vectorized text transformation\n", "The transformation appears to work against new data! We will now attempt the same process against the entire 'rev' dataset. \n", "\n", "Note: This is the same tactic as we tried above (commented out), but we are hoping for better, more accurate results since we are reducing the text to lemmas and cleaning it more." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(rev[['text', 'useful']], rev.useful, test_size=0.5)\n", "\n", "# Had to run this training and compute command after emptying as much in memory as possible\n", "# new_vec = process_n_count(X_train).compute()\n", "\n", "from joblib import dump, load\n", "# dump(new_vec, \"vec.joblib\")\n", "new_vec = load('vec.joblib')\n", "\n", "myX = test(X_test.text, new_vec).compute()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.set_style('darkgrid')\n", "# plt.style.use('ggplot')\n", "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16,9), sharey=True)\n", "myInd1 = X_train[X_train.useful > 0].index.compute()\n", "myInd0 = X_train[X_train.useful == 0].index.compute()\n", "\n", "ax1.bar(range(300), np.array(myX[myInd1].sum(axis=0)).tolist()[0], width=1)\n", "ax2.bar(range(300), np.array(myX[myInd0].sum(axis=0)).tolist()[0], width=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visual differences \n", "Visually, we are seeing striking similarities between\n", "\n", "### Readability" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Heatmap\n", "\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "nlp.pipe(rev.text)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Doc2Vec: How to handle the large corpus\n", "After some trial, error, and research, it is my conclusion that the model computation will not differ between one large or several chunks. This stems from a blog written by the library's author - Radim Rehurek - discussing multiprocessing for faster running times. If the content can be processed in parallel, it is not requiring information from the entire corpus and can be run in pieces.\n", "\n", "### Verifying training updates\n", "We will verify if this can be done below. Within gensim, we can access the document and word vectors after training updates to see if the model updates." ] }, { "cell_type": "code", "execution_count": 427, "metadata": {}, "outputs": [], "source": [ "# Tagging function with delayed capability\n", "@delayed\n", "def tag(df):\n", " df.text = df.text.apply(fix_nl)\n", " tags = []\n", " for i, doc in enumerate(nlp.pipe(df.text, disable=['ner', 'parser'])):\n", " mylist = [chunk.lemma_.lower() for chunk in doc if not chunk.is_punct and not chunk.is_stop]\n", " tags.append(TaggedDocument(mylist, [i]))\n", " \n", " vec = Doc2Vec(min_count=1)\n", " vec.build_vocab(tags)\n", " \n", " return vec, tags" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(17033, 0.38854920864105225),\n", " (23295, 0.3664354085922241),\n", " (12518, 0.3616259694099426),\n", " (12915, 0.3600383400917053),\n", " (14743, 0.35090503096580505)]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vec1, tags1 = tag(ddf).compute()\n", "test_arr = vec1.docvecs[0]\n", "vec1.docvecs.most_similar(0, topn=5)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('cantor', 0.3950194716453552),\n", " ('allllways', 0.39479339122772217),\n", " ('chitown', 0.3757461905479431),\n", " ('fantastico', 0.35876384377479553),\n", " ('sick', 0.35497692227363586)]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vec1.wv.most_similar(tags1[0][0], topn=5)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(17033, 0.38854920864105225),\n", " (23295, 0.3664354085922241),\n", " (12518, 0.3616259694099426),\n", " (12915, 0.3600383400917053),\n", " (14743, 0.35090503096580505)]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vec2, tags2 = tag(rev).compute()\n", "vec1.train(tags2, len(tags1) + len(tags2), epochs=5)\n", "test_arr1 = vec1.docvecs[0]\n", "vec1.docvecs.most_similar(0, topn=5)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('apartment', 0.9960299134254456),\n", " ('spicy', 0.9960122108459473),\n", " ('oil', 0.9959917664527893),\n", " ('dry', 0.9959776401519775),\n", " ('b', 0.9959765672683716)]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vec1.wv.most_similar(tags1[0][0], topn=5)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Every single value in array is equal\n", "sum(test_arr != test_arr1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Basic retraining failed\n", "It appears the document vectors within the model are *not* returning different results when compared with the most_similar method. I did see this somewhere but wanted to verify. We can verify the document vectors themselves are not different, as shown when comparing the test arrays.\n", "\n", "Moving forward, some Google queries and documentation digging show Doc2Vec does not allow the same training update approach that word vectors allow, as I suspected. We will have to work with the word embeddings instead.\n", "\n", "### Similarity_matrix\n", "We are actually able to \n", "\n", "#### Steps\n", "1. Create gensim Dictionary\n", "2. Create corpus of tuples\n", "3. Create LSI model with each input\n", "4. Query and fit a Dictionary object into the vector space\n", "\n", "Where does model.wv.similarity_matrix fit in?" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvectorize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrev\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/anaconda3/envs/py36/lib/python3.6/site-packages/dask/base.py\u001b[0m in \u001b[0;36mcompute\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 174\u001b[0m \"\"\"\n\u001b[0;32m--> 175\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraverse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 176\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py36/lib/python3.6/site-packages/dask/base.py\u001b[0m in \u001b[0;36mcompute\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 444\u001b[0m \u001b[0mkeys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dask_keys__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 445\u001b[0m \u001b[0mpostcomputes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dask_postcompute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 446\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mschedule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 447\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mrepack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpostcomputes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py36/lib/python3.6/site-packages/dask/threaded.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(dsk, result, cache, num_workers, pool, **kwargs)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0mget_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_thread_get_id\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0mpack_exception\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpack_exception\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 82\u001b[0;31m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 83\u001b[0m )\n\u001b[1;32m 84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py36/lib/python3.6/site-packages/dask/local.py\u001b[0m in \u001b[0;36mget_async\u001b[0;34m(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[0;31m# Main loop, wait on tasks to finish, insert new ones\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 479\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"waiting\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"ready\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"running\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 480\u001b[0;31m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mres_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfailed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mqueue_get\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqueue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 481\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfailed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres_info\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py36/lib/python3.6/site-packages/dask/local.py\u001b[0m in \u001b[0;36mqueue_get\u001b[0;34m(q)\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mqueue_get\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 144\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 145\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py36/lib/python3.6/queue.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 164\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_empty\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 165\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py36/lib/python3.6/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 295\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 296\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "vec = CountVectorizer(max_features=100, ngram_range=(1,2), stop_words='english')\n", "\n", "@delayed\n", "def vectorize(df):\n", " return vec.fit_transform(df.text)\n", "\n", "it = vectorize(rev).compute()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# sklearn tool faster than [fix_nl(txt).split] for txt in rev.text.compute()])\n", "mydct = Dictionary([vec.get_feature_names()])\n", "mycorp = [mydct.doc2bow(fix_nl(txt).split()) for txt in rev.text.compute()]\n", "mylsi = LsiModel(mycorp, id2word=mydct, num_topics=100)\n", "mylsi[mydct.doc2bow(fix_nl(list(rev.text.loc[0].compute())[0]).split())]" ] }, { "cell_type": "code", "execution_count": 287, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0, 0.8622897373758738), (1, -0.7724632421528443)]" ] }, "execution_count": 287, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sklearn tool faster than [fix_nl(txt).split] for txt in rev.text.compute()])\n", "mydct = Dictionary([vec.get_feature_names()])\n", "mycorp = [mydct.doc2bow(fix_nl(txt).split()) for txt in rev.text.compute()]\n", "mylsi = LsiModel(mycorp, id2word=mydct, num_topics=2)\n", "mylsi[mydct.doc2bow(fix_nl(list(rev.text.loc[0].compute())[0]).split())]" ] }, { "cell_type": "code", "execution_count": 290, "metadata": {}, "outputs": [], "source": [ "mybow = [mydct.doc2bow(fix_nl(list(rev.text.loc[i].compute())[0]).split()) for i in range(len(rev))]\n", "mylsa = [mylsi[bow] for bow in mybow]" ] }, { "cell_type": "code", "execution_count": 354, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 354, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "x = []\n", "y = []\n", "for i, val in enumerate(mylsa):\n", " try:\n", " x.append(val[0][1])\n", " except:\n", " pass\n", " try:\n", " y.append(val[1][1])\n", " except:\n", " pass\n", "\n", "fig, ax = plt.subplots(figsize=(16,9))\n", "sns.scatterplot(x=x, y=y, alpha=0.1)" ] }, { "cell_type": "code", "execution_count": 342, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7.533877018931459" ] }, "execution_count": 342, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tsne = TSNE()\n", "TSNE.fit_transform(mylsi)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build two separate LSI models\n", "Compare the text to both and check soft cosine similarity?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }