{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data retrieval" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import os\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 429, "metadata": {}, "outputs": [], "source": [ "seed_urls = ['https://inshorts.com/en/read/technology',\n", " 'https://inshorts.com/en/read/sports',\n", " 'https://inshorts.com/en/read/world']\n", "\n", "def build_dataset(seed_urls):\n", " news_data = []\n", " for url in seed_urls:\n", " news_category = url.split('/')[-1]\n", " data = requests.get(url)\n", " soup = BeautifulSoup(data.content, 'html.parser')\n", " \n", " news_articles = [{'news_headline': headline.find('span', \n", " attrs={\"itemprop\": \"headline\"}).string,\n", " 'news_article': article.find('div', \n", " attrs={\"itemprop\": \"articleBody\"}).string,\n", " 'news_category': news_category}\n", " \n", " for headline, article in \n", " zip(soup.find_all('div', \n", " class_=[\"news-card-title news-right-box\"]),\n", " soup.find_all('div', \n", " class_=[\"news-card-content news-right-box\"]))\n", " ]\n", " news_data.extend(news_articles)\n", " \n", " df = pd.DataFrame(news_data)\n", " df = df[['news_headline', 'news_article', 'news_category']]\n", " return df" ] }, { "cell_type": "code", "execution_count": 430, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
news_headlinenews_articlenews_category
0World's cheapest phone 'Freedom 251' maker's f...The maker of world's cheapest smartphone 'Free...technology
1US unveils world's most powerful supercomputer...The US has unveiled the world's most powerful ...technology
2FB bug changed 1.4 cr users’ privacy setting t...Facebook has said it recently found a bug that...technology
3Contest for 1st couple to marry in self-drivin...The American Automobile Association has launch...technology
4China's ZTE to pay $1 billion fine to US to li...Chinese telecommunications equipment maker ZTE...technology
5Android Co-founder's startup unveils magnetic ...Android Co-founder Andy Rubin's startup Essent...technology
6Yahoo Messenger to shut down 20 years after la...Yahoo has announced it is discontinuing its Me...technology
7Google won't design AI for weapons, surveillan...Google CEO Sundar Pichai has clarified the com...technology
8Virgin Hyperloop One may allow riders to see t...Richard Branson-led Virgin Hyperloop One has s...technology
9Apple patents wearable device to monitor blood...Apple has been granted the patent for a wearab...technology
\n", "
" ], "text/plain": [ " news_headline \\\n", "0 World's cheapest phone 'Freedom 251' maker's f... \n", "1 US unveils world's most powerful supercomputer... \n", "2 FB bug changed 1.4 cr users’ privacy setting t... \n", "3 Contest for 1st couple to marry in self-drivin... \n", "4 China's ZTE to pay $1 billion fine to US to li... \n", "5 Android Co-founder's startup unveils magnetic ... \n", "6 Yahoo Messenger to shut down 20 years after la... \n", "7 Google won't design AI for weapons, surveillan... \n", "8 Virgin Hyperloop One may allow riders to see t... \n", "9 Apple patents wearable device to monitor blood... \n", "\n", " news_article news_category \n", "0 The maker of world's cheapest smartphone 'Free... technology \n", "1 The US has unveiled the world's most powerful ... technology \n", "2 Facebook has said it recently found a bug that... technology \n", "3 The American Automobile Association has launch... technology \n", "4 Chinese telecommunications equipment maker ZTE... technology \n", "5 Android Co-founder Andy Rubin's startup Essent... technology \n", "6 Yahoo has announced it is discontinuing its Me... technology \n", "7 Google CEO Sundar Pichai has clarified the com... technology \n", "8 Richard Branson-led Virgin Hyperloop One has s... technology \n", "9 Apple has been granted the patent for a wearab... technology " ] }, "execution_count": 430, "metadata": {}, "output_type": "execute_result" } ], "source": [ "news_df = build_dataset(seed_urls)\n", "news_df.head(10)" ] }, { "cell_type": "code", "execution_count": 431, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "world 25\n", "sports 25\n", "technology 24\n", "Name: news_category, dtype: int64" ] }, "execution_count": 431, "metadata": {}, "output_type": "execute_result" } ], "source": [ "news_df.news_category.value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Text Wrangling and Pre-processing" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import spacy\n", "import pandas as pd\n", "import numpy as np\n", "import nltk\n", "from nltk.tokenize.toktok import ToktokTokenizer\n", "import re\n", "from bs4 import BeautifulSoup\n", "from contractions import CONTRACTION_MAP\n", "import unicodedata\n", "\n", "nlp = spacy.load('en_core', parse = True, tag=True, entity=True)\n", "#nlp_vec = spacy.load('en_vecs', parse = True, tag=True, entity=True)\n", "tokenizer = ToktokTokenizer()\n", "stopword_list = nltk.corpus.stopwords.words('english')\n", "stopword_list.remove('no')\n", "stopword_list.remove('not')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Remove HTML tags" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Some important text'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def strip_html_tags(text):\n", " soup = BeautifulSoup(text, \"html.parser\")\n", " stripped_text = soup.get_text()\n", " return stripped_text\n", "\n", "strip_html_tags('

Some important text

')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Remove accented characters" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Some Accented text'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def remove_accented_chars(text):\n", " text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')\n", " return text\n", "\n", "remove_accented_chars('Sómě Áccěntěd těxt')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Expand contractions" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'You all cannot expand contractions I would think'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def expand_contractions(text, contraction_mapping=CONTRACTION_MAP):\n", " \n", " contractions_pattern = re.compile('({})'.format('|'.join(contraction_mapping.keys())), \n", " flags=re.IGNORECASE|re.DOTALL)\n", " def expand_match(contraction):\n", " match = contraction.group(0)\n", " first_char = match[0]\n", " expanded_contraction = contraction_mapping.get(match)\\\n", " if contraction_mapping.get(match)\\\n", " else contraction_mapping.get(match.lower()) \n", " expanded_contraction = first_char+expanded_contraction[1:]\n", " return expanded_contraction\n", " \n", " expanded_text = contractions_pattern.sub(expand_match, text)\n", " expanded_text = re.sub(\"'\", \"\", expanded_text)\n", " return expanded_text\n", "\n", "expand_contractions(\"Y'all can't expand contractions I'd think\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Remove special characters" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Well this was fun What do you think '" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def remove_special_characters(text, remove_digits=False):\n", " pattern = r'[^a-zA-z0-9\\s]' if not remove_digits else r'[^a-zA-z\\s]'\n", " text = re.sub(pattern, '', text)\n", " return text\n", "\n", "remove_special_characters(\"Well this was fun! What do you think? 123#@!\", \n", " remove_digits=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Text lemmatization" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'My system keep crash ! his crash yesterday , ours crash daily'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def lemmatize_text(text):\n", " text = nlp(text)\n", " text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])\n", " return text\n", "\n", "lemmatize_text(\"My system keeps crashing! his crashed yesterday, ours crashes daily\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Text stemming" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'My system keep crash hi crash yesterday, our crash daili'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def simple_stemmer(text):\n", " ps = nltk.porter.PorterStemmer()\n", " text = ' '.join([ps.stem(word) for word in text.split()])\n", " return text\n", "\n", "simple_stemmer(\"My system keeps crashing his crashed yesterday, ours crashes daily\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Remove stopwords" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "', , stopwords , computer not'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def remove_stopwords(text, is_lower_case=False):\n", " tokens = tokenizer.tokenize(text)\n", " tokens = [token.strip() for token in tokens]\n", " if is_lower_case:\n", " filtered_tokens = [token for token in tokens if token not in stopword_list]\n", " else:\n", " filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]\n", " filtered_text = ' '.join(filtered_tokens) \n", " return filtered_text\n", "\n", "remove_stopwords(\"The, and, if are stopwords, computer is not\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Building a text normalizer" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def normalize_corpus(corpus, html_stripping=True, contraction_expansion=True,\n", " accented_char_removal=True, text_lower_case=True, \n", " text_lemmatization=True, special_char_removal=True, \n", " stopword_removal=True, remove_digits=True):\n", " \n", " normalized_corpus = []\n", " # normalize each document in the corpus\n", " for doc in corpus:\n", " # strip HTML\n", " if html_stripping:\n", " doc = strip_html_tags(doc)\n", " # remove accented characters\n", " if accented_char_removal:\n", " doc = remove_accented_chars(doc)\n", " # expand contractions \n", " if contraction_expansion:\n", " doc = expand_contractions(doc)\n", " # lowercase the text \n", " if text_lower_case:\n", " doc = doc.lower()\n", " # remove extra newlines\n", " doc = re.sub(r'[\\r|\\n|\\r\\n]+', ' ',doc)\n", " # lemmatize text\n", " if text_lemmatization:\n", " doc = lemmatize_text(doc)\n", " # remove special characters and\\or digits \n", " if special_char_removal:\n", " # insert spaces between special characters to isolate them \n", " special_char_pattern = re.compile(r'([{.(-)!}])')\n", " doc = special_char_pattern.sub(\" \\\\1 \", doc)\n", " doc = remove_special_characters(doc, remove_digits=remove_digits) \n", " # remove extra whitespace\n", " doc = re.sub(' +', ' ', doc)\n", " # remove stopwords\n", " if stopword_removal:\n", " doc = remove_stopwords(doc, is_lower_case=text_lower_case)\n", " \n", " normalized_corpus.append(doc)\n", " \n", " return normalized_corpus" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pre-process and normalize news articles" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "news_df['full_text'] = news_df[\"news_headline\"].map(str)+ '. ' + news_df[\"news_article\"]" ] }, { "cell_type": "code", "execution_count": 442, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'clean_text': 'us unveils world powerful supercomputer beat china us unveil world powerful supercomputer call summit beat previous record holder china sunway taihulight peak performance trillion calculation per second twice fast sunway taihulight capable trillion calculation per second summit server reportedly take size two tennis court',\n", " 'full_text': \"US unveils world's most powerful supercomputer, beats China. The US has unveiled the world's most powerful supercomputer called 'Summit', beating the previous record-holder China's Sunway TaihuLight. With a peak performance of 200,000 trillion calculations per second, it is over twice as fast as Sunway TaihuLight, which is capable of 93,000 trillion calculations per second. Summit has 4,608 servers, which reportedly take up the size of two tennis courts.\"}" ] }, "execution_count": 442, "metadata": {}, "output_type": "execute_result" } ], "source": [ "news_df['clean_text'] = normalize_corpus(news_df['full_text'])\n", "norm_corpus = list(news_df['clean_text'])\n", "news_df.iloc[1][['full_text', 'clean_text']].to_dict()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Save the news articles" ] }, { "cell_type": "code", "execution_count": 443, "metadata": {}, "outputs": [], "source": [ "news_df.to_csv('news.csv', index=False, encoding='utf-8')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Tagging Parts of Speech" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "news_df = pd.read_csv('news.csv')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "corpus = normalize_corpus(news_df['full_text'], text_lower_case=False, \n", " text_lemmatization=False, special_char_removal=False)\n", "\n", "sentence = str(news_df.iloc[1].news_headline)\n", "sentence_nlp = nlp(sentence)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WordPOS tagTag type
0USNNPPROPN
1unveilsVBZVERB
2worldNNNOUN
3'sPOSPART
4mostRBSADV
5powerfulJJADJ
6supercomputerNNNOUN
7,,PUNCT
8beatsVBZVERB
9ChinaNNPPROPN
\n", "
" ], "text/plain": [ " Word POS tag Tag type\n", "0 US NNP PROPN\n", "1 unveils VBZ VERB\n", "2 world NN NOUN\n", "3 's POS PART\n", "4 most RBS ADV\n", "5 powerful JJ ADJ\n", "6 supercomputer NN NOUN\n", "7 , , PUNCT\n", "8 beats VBZ VERB\n", "9 China NNP PROPN" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in sentence_nlp]\n", "pd.DataFrame(spacy_pos_tagged, columns=['Word', 'POS tag', 'Tag type'])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WordPOS tag
0USNNP
1unveilsVBZ
2world'sVBZ
3mostRBS
4powerfulJJ
5supercomputer,JJ
6beatsNNS
7ChinaNNP
\n", "
" ], "text/plain": [ " Word POS tag\n", "0 US NNP\n", "1 unveils VBZ\n", "2 world's VBZ\n", "3 most RBS\n", "4 powerful JJ\n", "5 supercomputer, JJ\n", "6 beats NNS\n", "7 China NNP" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nltk_pos_tagged = nltk.pos_tag(sentence.split())\n", "pd.DataFrame(nltk_pos_tagged, columns=['Word', 'POS tag'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Shallow Parsing or Chunking Text" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10900 48\n", "(S\n", " Chancellor/NNP\n", " (PP of/IN)\n", " (NP the/DT Exchequer/NNP)\n", " (NP Nigel/NNP Lawson/NNP)\n", " (NP 's/POS restated/VBN commitment/NN)\n", " (PP to/TO)\n", " (NP a/DT firm/NN monetary/JJ policy/NN)\n", " (VP has/VBZ helped/VBN to/TO prevent/VB)\n", " (NP a/DT freefall/NN)\n", " (PP in/IN)\n", " (NP sterling/NN)\n", " (PP over/IN)\n", " (NP the/DT past/JJ week/NN)\n", " ./.)\n" ] } ], "source": [ "from nltk.corpus import conll2000\n", "data = conll2000.chunked_sents()\n", "\n", "train_data = data[:10900]\n", "test_data = data[10900:] \n", "\n", "print(len(train_data), len(test_data))\n", "print(train_data[1]) " ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Chancellor', 'NNP', 'O'),\n", " ('of', 'IN', 'B-PP'),\n", " ('the', 'DT', 'B-NP'),\n", " ('Exchequer', 'NNP', 'I-NP'),\n", " ('Nigel', 'NNP', 'B-NP'),\n", " ('Lawson', 'NNP', 'I-NP'),\n", " (\"'s\", 'POS', 'B-NP'),\n", " ('restated', 'VBN', 'I-NP'),\n", " ('commitment', 'NN', 'I-NP'),\n", " ('to', 'TO', 'B-PP'),\n", " ('a', 'DT', 'B-NP'),\n", " ('firm', 'NN', 'I-NP'),\n", " ('monetary', 'JJ', 'I-NP'),\n", " ('policy', 'NN', 'I-NP'),\n", " ('has', 'VBZ', 'B-VP'),\n", " ('helped', 'VBN', 'I-VP'),\n", " ('to', 'TO', 'I-VP'),\n", " ('prevent', 'VB', 'I-VP'),\n", " ('a', 'DT', 'B-NP'),\n", " ('freefall', 'NN', 'I-NP'),\n", " ('in', 'IN', 'B-PP'),\n", " ('sterling', 'NN', 'B-NP'),\n", " ('over', 'IN', 'B-PP'),\n", " ('the', 'DT', 'B-NP'),\n", " ('past', 'JJ', 'I-NP'),\n", " ('week', 'NN', 'I-NP'),\n", " ('.', '.', 'O')]" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from nltk.chunk.util import tree2conlltags, conlltags2tree\n", "\n", "wtc = tree2conlltags(train_data[1])\n", "wtc" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(S\n", " Chancellor/NNP\n", " (PP of/IN)\n", " (NP the/DT Exchequer/NNP)\n", " (NP Nigel/NNP Lawson/NNP)\n", " (NP 's/POS restated/VBN commitment/NN)\n", " (PP to/TO)\n", " (NP a/DT firm/NN monetary/JJ policy/NN)\n", " (VP has/VBZ helped/VBN to/TO prevent/VB)\n", " (NP a/DT freefall/NN)\n", " (PP in/IN)\n", " (NP sterling/NN)\n", " (PP over/IN)\n", " (NP the/DT past/JJ week/NN)\n", " ./.)\n" ] } ], "source": [ "tree = conlltags2tree(wtc) \n", "print(tree)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [], "source": [ "def conll_tag_chunks(chunk_sents):\n", " tagged_sents = [tree2conlltags(tree) for tree in chunk_sents]\n", " return [[(t, c) for (w, t, c) in sent] for sent in tagged_sents]\n", "\n", "\n", "def combined_tagger(train_data, taggers, backoff=None):\n", " for tagger in taggers:\n", " backoff = tagger(train_data, backoff=backoff)\n", " return backoff " ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [], "source": [ "from nltk.tag import UnigramTagger, BigramTagger\n", "from nltk.chunk import ChunkParserI\n", "\n", "class NGramTagChunker(ChunkParserI):\n", " \n", " def __init__(self, train_sentences, \n", " tagger_classes=[UnigramTagger, BigramTagger]):\n", " train_sent_tags = conll_tag_chunks(train_sentences)\n", " self.chunk_tagger = combined_tagger(train_sent_tags, tagger_classes)\n", "\n", " def parse(self, tagged_sentence):\n", " if not tagged_sentence: \n", " return None\n", " pos_tags = [tag for word, tag in tagged_sentence]\n", " chunk_pos_tags = self.chunk_tagger.tag(pos_tags)\n", " chunk_tags = [chunk_tag for (pos_tag, chunk_tag) in chunk_pos_tags]\n", " wpc_tags = [(word, pos_tag, chunk_tag) for ((word, pos_tag), chunk_tag)\n", " in zip(tagged_sentence, chunk_tags)]\n", " return conlltags2tree(wpc_tags)" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ChunkParse score:\n", " IOB Accuracy: 90.0%%\n", " Precision: 82.1%%\n", " Recall: 86.3%%\n", " F-Measure: 84.1%%\n" ] } ], "source": [ "ntc = NGramTagChunker(train_data)\n", "print(ntc.evaluate(test_data))" ] }, { "cell_type": "code", "execution_count": 152, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(S\n", " (NP US/NNP)\n", " (VP unveils/VBZ world's/VBZ)\n", " (NP most/RBS powerful/JJ supercomputer,/JJ beats/NNS China/NNP))\n" ] } ], "source": [ "chunk_tree = ntc.parse(nltk_pos_tagged)\n", "print(chunk_tree)" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsoAAABiCAIAAADGJGK9AAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAHXRFWHRTb2Z0d2FyZQBHUEwgR2hvc3RzY3JpcHQgOS4wOfoZEaQAABVKSURBVHic7d1BjOPWeQfwb+1d1zuynRLeGdcGgpmy2AYe5eCC0SmH3QJMDxugp9D3vbBAzgmo4+YmITkGBoYIkNwKDN2jAxTDw0wBHwwNe8qocINhNG0Ru6P10Fuv5LHXm+nh231g+EiKkp5ISvr/sFhoOBL5+ImP7+N7j5xrV1dXBAAAAKDOC1UXAAAAAFYN0gsAAABQDOkFAAAAKIb0AgAAABRDegEAAACKIb0AAAAAxa5XXQCAZzzPC4LAsixN03Rdr7o4AAAwO/ReQC10u11d1zudThAErutWXRwAAJgL0guohSiKDMMgItu2TdOsujgAADCXa3hqJ9RBEAS+70dRZJom0gsAgGWH9ALqxff9IAgcx6m6IAAAMDsMjkAtdLtdfmGaZhRF1RYGAADmhDtHoBZ83yciwzB837csq+riAADAXDA4AnURRVEQBJh4AQCwApBeAAAAgGKYewEAAACKIb0AAAAAxZBeAAAAgGJIL6BG/vmjj4Kzs6pLAQAA88KNqVCeaDwOBgMiCgaDaDzmF9/86U/Hf/jD48tLInr5xo3LJ0/E+42dHa3RICJtY0Pf2nq2cHubF+qbm/rmZuk7AQAAk+HOEVApHA7D4ZCI/H6fiKLRiH8MBoNoNEq8+fYbb1yMRp89fkxEf7mx8f3bt3ffeusfvvtd8fH4GojIPznJ2q7ZbPILfXOTkw8iMnd3+YWxs6NtbKjZQwAAKADpBUwnODuLRqNoNOJRDNH8y22/1mgYOztExP9rGxvfefPNf/v4408fPfooDMPzcyKyWi1jZ8dsNo3t7WnLwFsXgynh+Tn3iESjEfeRyESRRKm4YOK1yEgAAGAeSC8gSXQ8cMvNzXZqm61vbfHwhEggRD4R7y0Izs68Xs8/OeE16FtbVqtlbG9brVY5+0KxbhUiEjsSDoec5cjErlEsERHjMlqjMVU+BACwbpBerCNudEWLy81talsrZj9wEyumO0y8yo/GY6/XCwYDr9fjngbuqLBarRpOmBCTQig2L4RiiUjq4A5LnSAi4oMJIgCwnpBerKDUGZSU0UZy6yiaRr5An/nqvMKOinLEe0HkCSI54zKUNkEkPi6DCSIAsEqQXiylqWZQcqsmmjS+sFZ4VR0Oh36/75+c+P1+NBppjYa5u2s2m+bu7jpfuOdPEMkZl0mdIBL/vjBBBADqD+lFTc0zgzIxkLEgfr/PWQVfr/MMTXN3F43ftPIniOSMy4gJIqk37mKCCABUCOlFZZTPoCwBOioqNHGCSM6Nu2KCSOqNu5ggAgDKIb1YoBJmUJYDHRXLJXWCSJEbdyk2QST1xl1MEAGAgpBezK7CGZQlCIdDcesHEWmNhrj1Aw3MykidIFLkxl1MEAGAfEgv8tRqBmU5vF6P7/7gdoU7Kvjuj6qLBlWaeYJI/pPd65xhA8A81j29qP8MyhKgowJUSZ0gouTJ7itQ0QDWyuqnF8s4g7IE0Xjsn5zEOyrMZpNTClxNQglEL4jyJ7uvZIUFWDqrkF6szAzKEgRnZ3zfB19E6ltbPEPTbDZxRoZ6EhNElDzZXdR6jMsALNQSpBerPYOyBNxRwXd/iI4KTinWOSywesQEESVPdhcTRDAuAzCDWqQXaziDsgToqABIFZ8gIj/ZneaYIIJxGQChpPQCMyjLIf6QmOiomO0vngMATXqy+zwTRNZnQBbWlvr0gu9sxAzKkrlHR+7h4ar+ITGAmsu/cXeqCSL2nTu4lIIVcF35GrlP/tnIRbNpNptrOIOyfOFwqG9uWq1WPf/iOcBqK3hyy3+yezAYBIMBHrEPq6EWcy8AAABglbxQdQEAAABg1SC9AAAAAMWQXgAAAIBi807tDIIgiiLTNInI930iMgwjiqIwDPkNuq7ruj7nViCBQ82x5a9A0zRN0xB2gOUin0Jv3br18OFD/i0qMiyveXsvDMNwXZdrhWmavu9rmsb1wTRNXdc9z/M8T0FJIUbX9TAMOc6GYfi+bxgGwg6wdORT6DvvvEOoyLD8FAyO6LrOdSP1V47jiEtqUIXTC34dRVHi+gZhB1giWadQVGRYamrmXti23e125eVRFHW7XXTuLYLIMDzPsywr/iuEHWC5pJ5CUZFhqal5rJau6/GBfyIKw7Ddbuu6blkWqsci2Lbdbrc7nQ5PvOCFCDvAMkqcQlGRYQUoe2ont3biR13XbdtWtXLIEgSBYRjiR4QdYEnFT6GoyLACXnzw4ME8n/d93/O8y8tLbuR837csixd++OGHYRjGGz9QS9O0drstvkGEHWDpyKdQTdNQkWEF4KHgAAAAoBgeqwUAAACKIb0AAAAAxZBeAAAAgGJILwAAAEAxpBcAAACgmLLnXkCF3KOjjz/55DtvvmnfuVN1WQBgCsHZWTQa/c/Fxb/+7nefPnr0+Xj8+Kuvbr3yyl9961vffv31f3znHSIydna0jY2qSwowHfU3pl67f//gpz81d3fVrhZk4XDoHh25h4fRaPT2W2/9xx//qDUa9t279p07+uZm1aUDACIiv98nonA4DIdDIgrPz6Px+L8+++w/P/1UfvNf3Ljx1ZMnN1588cnTp/Jvv3/79s2XXtI3N7VGg4iM7W2t0dAaDWN7e8E7ATA19F4sJa/Xc4+O/JMTrdGwWi377l1jezs4O3MPD93Dw+4HH5jNptVqoTMDYNGi8TgYDIgoGAyi8ZhfEOcT5+eJN7928+Y3T5+Ov/6af9x89dVvv/76377xxt+//baxs2Nsb7fff7/7wQdf/+pXnI78y/Hxf19cfPLo0b8PBkT04e9/T0Svv/LKk6dP/+/LLxMr17e2+LrC2NkhIn1zU/yIzg8oH3ovlgl3V3i9Xnh+rm9tOffuWa1W4sQRjcder+ceHgaDATozAOYnOh64HyIajfjHYDCIRqPEm81m84vLy1dffvnx5eWnjx59+eTJ/z56xL/i5t/Y2eGGP/Uk6fV677733tWvf51VDL/fD8/Pw+GQkxgievXmzb++dWvjpZdevnHj6urq2rVrX1xeit8KWqMh0g7u/OACoPMDFgTpxXLw+31OGojIvnvXarUmRpg7M7xeLxqNzGbTvnPHarXKKCvAsuEJENFoFJyd0fPxCyLyT04S7xSNNP+vbWy8dvPmJ59/Pvzii88eP463+vxO7jkQLyaWxO/3f/Dzn6emF6nFDs/P+f/UTb928+aNF174u+1tTozoec9KamJk7OxojYa2saFvbVGs8wMnc5gN0otai8Zj9/DQPTri7gqr1XLu3Zuqn1NeAzozYA0lJkBwKxuNRvJVfv4QQ1YXwmzJRGo5i6cXqR/nEgaDQXx0JqvjpHhYEnkVPU87RHwAEpBe1FS878FqtfjfPCuM938oWSFAfWRNgChymc4TJCntMl0kEzwgEu/MMJtNHmUwd3cVNrGcXhw/eKBqwCKecMSj8SzV2Noytrf5f/mz3KlDhUeFiKhIVGFNIL2ol0XPnEBnBiyprAkQ8vgFPW/qpp1kwDkKJyjBYBBf87MOCdXJhIzTi8WdQhP7mJpwFNzHqea0JvqEuI+H0Pmx0pBe1EX8LtMS7vuYYTIHwEKlToAo0lEv2qqphiSKNLQ5V/YLsuj0Qra4HprEFzpVRoh7blcA0ovq8c0gibtMy9l04lYU+84d++5d3MMGiyAudhMj/UUudpVMM8waJoinESUnE6mFLDm9kC16fkl8KzRf5wfuua0zpBeViTftxs4OdyFUVVW8Xo//ETozYFZZDUbxCRAKr1anneRYE8HZ2fcePKjbKXTiLSqLCKYYBSt+Ow/uua0VpBcV8Pt9nrZJNWvL0ZkB+bLm+hU/4y9orL2S9m9BluIUWnn2lpiLQwVyWVLaGQYTIb0oT2JaZZ0b79SnglZdKChD6iVjkRs4y+yvnphMKOm9r8qSnkLnuUVlQeUh6VnsuOe2TEgvyiAe101LdVNofLZp5cM3oMRUs/1rcs03ce4hXy4vaTIhu3b//v6Pf7wUp4gcCm9RWYSp7rktficzxCG9WKCVeT53hZNPYVpTdRqnPqug2hHrZzeFlvioibq5dv++88Mfdn70o6oLolj5DxGZ2TzTkHHPrYD0YiHC4bD729+KB3Kvxl8XkzszVmCnllHx+/3q3+tb8O5Qfl1tUUuzqumFrJxbVBZhhntua5XElwPphWKJC33n3r3VOy1ykrHsXTJ1VnzYeOlu2PP7/bo9aqJu1ie9kK3GFF0lDxyrbRUuDumFGms4TSHxJ9NWo4emHDNMgFjSPzRV5P4C/kPk1ZazVtY5vZBVfovKIhS/55ZmfQRtHSC9mFfiiRHrNi9hZeaXqDXtn/CmlXheobj0XKWWoHzf+9nPzGYT6UWWut2iotwMz4+hjL/AV2q5JerTix/84hcdy1rer3Yq7tHRP/3mNzW/y7QcojODiC5++cuqi1MZr9d797334kuynmBdnwkQSvyN43A+Uf+B85prv/++vrmJvsCCsqbvmM3mwU9+UnXpFqL44OnMf3pXCfXpxbrx+31cjQlcw9c5IDxVrT4XEKXxej1OLNZnl6GeuN3lvzVddVkqEO86rbYPDOkFAAAAKPZC1QUAAACAVYP0AgAAABRDegEAAACKXU9dGoZhGIaaphmG4fs+ERmGoWkaEXmeFwSBZVmapum6nvrxIAiiKDJNk4j449evX//mm2/iSwzDiKIoDEP+iK7rWWuriXa73el0xI9BEHieZxiGZVlTrYd3n/eXA6VpmqZpIhQcdnoeRl4o4l+JnJ3l40FEJgiCdrt9cHAw1fqLx4TqFBYi4qJOfBtXKH4tDvX4wvgOFqlitRJFUbfb5dfxOpJQvMpMDFf9TxcLVfCom58cc7XfQuLsMa154lCwkSre6s2D12+aJm/R9/0gCBzHSX1zoiWamRwBwzDCMEwsvHXr1sOHD/kjU3/jVxkODg7Ea8dx+EWn0zk+Pr66utrb2xMLU1mWJdbA75SXiK2cnp52Op39/f2cFVbu9PRUXhiPUvH17O3tiR8Tobi4uNjf3+dQxMOeuvWSZe1s/Eg4Pj7OOaiyFI/JVc3CUvwASD3U5R0sXsXqo9PpXFxcFHzzVBGT35y6cN2UHAE55gq/hXkO8jnLULCRWnSVdByHN3pwcGDbNi/M2TWFJ73U/ZUXztxMp/deZImiiK+xbNvm1CaLruu+73MSlLUk/ivHccQF0DxE3heGoeu6uq7bth2GYbfb1XWdL4gdx/F9nwtjWZbrumEYmqZpmiZfYOm6HkWRbducuvKbDcPIT9zExRmvTVyMyjsr0v8oihLr1DTNsiwOBa+B3zxPysypsW3b3Dfguq5lWWJnNU2LoshxHN5ZjoZlWZ7nUfbFqPhsomCGYSS+4iJhKR4TUhcW1u12E50lvMupwUnsSxRFnueJrhfbtiduLvVQj+9g8So2EX+VXPIoiizLMgxD3q/8wyNRF+TDw3Vd3/e5P4krUWodnGdHlJCPw9RypgZNfDwejdSawh05fDxrmsYXoHLM5aNOfCS+3dQS+r6fetQVLGENiSOfD0JK2xd6fh7mQHU6nfw4TDwPU+FGSmGVlIVhqOs6b9E0zfgJzXVdfoM4/8gtkdy0cedHPFA5W0/d36yWepZmOivvSO29OD4+7nQ6ItvK4TgOJzvi4/KSq9jVm8Lei9SS89bFtuK/EpfO4leJ1/Jqs7bF13Cnp6ec7WbZ29vjwuzt7YnLPk6NHcexLCuenyrJl3klvJvxmPCLi4uL+FZs2079LsTOxoNzfHxsWVbq28RWioRlqphcKQoLl5bDwtcN+cGR96X49dP+/v7BwcH+/r7jOCIO8g4Wr2JF2LYtgimqYep+Xf354ZFTF+TDQ/4uUuug/Kt8ansvUo/D1HLKQcuKRn4oco6l1KNO3m5WCeUIFC/hDBbae2FZVsFoC/v7+zm1r/h5uGAjpbZKJmRFUvQIJnp2r9JOsImmTRCBypK6v/LCmZvp6XovDMPgPM73/W63mzU4JJKd+PB56pIwDNvttq7rlmUtejCV18/XEETEVyr8v7hg5fLMtn7HcVzX5YuS/Ms127Z5/Cw+digScL4A4iJ1u12xcJ7RVnGRKtYTz08Ta+aL6Zy1BUEgrgnkTp1E2lswLMVjQurCIrZCfx6ErOAU/4rziejJOzhVFZuIr4/F66z9kg+PnLow8fCYE9fKIgunUvy7SwSNsqORHwo+XLNiLh918naLm62E+eSYz/8tyLhR4Nf50RbXzfm1fqpKWqSRUlslCxLTPuI9u1kSTVvBQInPJvZXXjhzM52ZXqTWChFc7gKduHZuM3KWLKLjVMz7Ey9S8deWaMwMw5i5PNw3RVJbmCXeSMeJswx3cInewtRxpeK4l7Xb7fJGDcPodruzrZN7z/h1fH5uqqnCMjEmpDosqbKCM+1XHKdpGq9Q13X5NC12cNoqli+KInGEcx9v1peeODzmrAsF62Cq1GGynLGz4quVv7vUciaCRrNGg1dSvKLJ280qoWyqEvIZeOLRK8d8/m8hX060gyAQ6QKP9eQUe6pKOrGRUlslEwzDcF1XHB5ZJ8DiigdKkCOQWDhzM52ZXvBgMI//iXxNzC/1fT8nKeYhQz6HmqbJo7OJJfw2HjxTnmTw+qMoCoKAiIIgEFt3XZcXEpFpmu++++7+/j7/yCkb73X8S+IoR1EUL6q8kK//uOmdeIhYlhW/wyI+jkjP8zkebeXSzlmZeT2cUfG3ydMmxFcsvs12u813f1DsBJQaAdd1NU3jQGXNqqHnMz+KhKVITNSGhbfCM6VFnsTHSWpw5H3Rdb3dbvNMBc7us7bC+8L9E57n8WC8vIMFq1hBPADPZ20x713eL/nwyKkLicODKxQvEWPnJNVByjiKsspMRDxvIGfhVLKOQ7mciaBlRSO1pojwcldQp9NJjXnqUSdvN6uE8lFXvIS8QnHSyyHHfP5vIYFnh8T3JSvauq5zZeFbVzh6qbWv4AmnYCNFqqtkAl918F5wgXlDnueFYSgaLJ6iIdcguWlzHEcOVMEIZIVl9mY6Z+Dk4uIidZCv/jO3T09PZ55eO88OLkVw4uYp8MShTSVbqVDB41/53ilcYeoMleLrn/PwmLkOpn52nhWy1N2RV5s1rad4NA4ODhK30hT5bNZ2U3d85pPzwcFBYrJUDnnT838LWaUqErGCb6tzlcwi71p91jYz/M0RgNUUvw2k6rIsjaqCVtp2+a6KRU90AyD8STMAAABQDg8FBwAAAMWQXgAAAIBiSC8AAABAMaQXAAAAoBjSCwAAAFAM6QUAAAAo9v9DKMOhZ04sJAAAAABJRU5ErkJggg==", "text/plain": [ "Tree('S', [Tree('NP', [('US', 'NNP')]), Tree('VP', [('unveils', 'VBZ'), (\"world's\", 'VBZ')]), Tree('NP', [('most', 'RBS'), ('powerful', 'JJ'), ('supercomputer,', 'JJ'), ('beats', 'NNS'), ('China', 'NNP')])])" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.display import display\n", "os.environ['PATH'] = os.environ['PATH']+\";C:\\\\Program Files\\\\gs\\\\gs9.09\\\\bin\\\\\"\n", "display(chunk_tree)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Constituency parsing" ] }, { "cell_type": "code", "execution_count": 446, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(ROOT\n", " (SINV\n", " (S\n", " (NP (NNP US))\n", " (VP\n", " (VBZ unveils)\n", " (NP\n", " (NP (NN world) (POS 's))\n", " (ADJP (RBS most) (JJ powerful))\n", " (NN supercomputer))))\n", " (, ,)\n", " (VP (VBZ beats))\n", " (NP (NNP China))))\n" ] } ], "source": [ "# set java path\n", "import os\n", "java_path = r'C:\\Program Files\\Java\\jdk1.8.0_102\\bin\\java.exe'\n", "os.environ['JAVAHOME'] = java_path\n", "\n", "from nltk.parse.stanford import StanfordParser\n", "\n", "scp = StanfordParser(path_to_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser.jar',\n", " path_to_models_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser-3.5.2-models.jar')\n", " \n", "result = list(scp.raw_parse(sentence))\n", "print(result[0])" ] }, { "cell_type": "code", "execution_count": 447, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAisAAAEdCAIAAABCB7BSAAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAHXRFWHRTb2Z0d2FyZQBHUEwgR2hvc3RzY3JpcHQgOS4wOfoZEaQAACAASURBVHic7d1PbBzZfSfwJ82MFyS9k6lsSGc0jtn7BgkCdtZ/8sxgMRhgdChdJDgbYFU6SjMHlwAhgB145Kqb5VsVpEMsBBOzfMgourG0t5EufAcxgLIH6mGNLNgIkLDQDGILS9p8dnbYZoKsew8/6amm/5HsrupX3f39YDAoPTa7f91drG+9P119pt1uMwAAgLE7a7sAAACYUUggAACwAwkEAAB2IIEAAMAOJBAAANjxqu0CAGBqZVmWZRltc8455+ZHaZoqpTzPE0L0bNFaK6Xy9+a67rgKhzFBHwgAykKR47ou5zxN0zRNqT0MQ8dxoijKsixJkp4tFD+u60op6f/2ngeUBQkEAKXjnAdBQP0hpRTnnDo0nudlWdbdwhgTQpjuEWPM931LtUOJMAoHACXSWkspaUjN8zzGmJQyCAJzA9d1f/jDH/7oRz/Kt1C/J38/+RE8mBroAwHAmFCfxnEcMznEGNNaf+ELX+hocRzHQn0wdkggACiR4ziu63qe53keTfB4nmcmhBhjUsrvfOc7HS358TeYYq/cunXLdg0AMJ2klGmaPnnyxHXdc+fOpWmqtRZCNBoNKeXR0VGSJFevXuWcd7ScO3eOMZZlWRzHSqlGo4GFcFPpDK5MCgDjRzND+VzpboGphwQCAAA7MA8EAAB2IIEAAMAOJBAAANiBBAIAADuQQAAAYAcSCABsCh88OPPBB7arADuQQAAAYAcSCABsEsvLtksAa5BAAGCTs7BguwSwBgkEAAB2IIEAwD61u2u7BLAACQQA9unDQ9slgAVIIAAAsAMJBAAAdiCBAMAmrIWbZUggALAJnweaZUggAACwAwkEAPZhLdxsQgIBgH34PNBsQgIBAIAdSCAAALADCQQAAHYggQDAMlGr2S4B7EACAYBlbr3OFxdtVwEWnGm327ZrAACAWYQ+EAAA2IEEAgAAO5BAAABgBxIIAADseNV2AQAwzdI0VUp5nuc4DuecMaaU0lrTTznnnHNqcV2XMSalZIx9+umnn//85/M/dRxHCGHxiUAZ0AcCgLLEccw5j6JIKZUkCTUKIShsXNelTBJCJElC2eO6rpTyy1/+cpZl5qdSSsTPVEICAUBZtNaUHL7vU+r0wzmnBDL/zLLM3AlFEUwfJBAAlMXzvDiOwzCUUg5OIMaY7/txHJt/mhBK09TzvHILBUuQQABQFiFEEARRFDHG8unSE+fccRzT9fF9nwbuaBKo7FLBCiQQAJTFpI7rumb1wQAmdQylFGaAphjWwgFAWWhqh5YSmJG0JEmyLNNa09AcLT2gpQo0XWRCyPO8MAw3NjasPQEoGa4LBwAl0lorpY6dBILZhAQCAAA7MA8EAAB2IIEAAMAOJBAAANiBBAIAm2Sj8YPc1RBgpmAlAgAUSe3u6sPD59vNpm61aDvb2zPbcns7/ytnzpxpt9uvnD37hddff/ONN/7LF7/4+2++KWo1Z2FBLC+Ps3gYMyQQAAyS7e9n+/vd2/rw8DPte3v97sFZWBC12vPt+Xm+tETbfHGRLy4yxn70N3/zP//hH376i1/8v1//uvvXv/ibv/n7b77JFxedhQX6FVGrOfPzxTw9sAoJBDBzZKNBG/rwUO3umnbVbL5sf7Hdk1uvm+18urzcHqr7km5tPfq7v/sfSv3fX/3qtVdf/dwrrxz+67/Sj/7T5z//808/zd/4eSepVmOMuSsr5v8wQZBAABOv38BXPkU6Br460NHcbL9sX1427eM8vstGQzYa6dZWtrf3H+fm3l5cfOXs2Z9/+mnzZz+jG9QWF3/79dd/+atfvXr27P/+53/O/y6lI/WTMJRXcUgggMrpN/CVn0pRzaZJnW58aYkGuBhjNH5ltvPtZruy1O5uurVFUcQY+5M//MP/vLjYbrd/orVsNOgVcBYWvvKlL/3266+/Pjd39syZX7fb9KLlBwapt5QfypuIpz/1kEAApdOt1mcGuHoNfA2eSmG5ga/8VMroA1+TItvfpyiiV8yt192Vlf/69tt7//IvandXNZsmkilsRK0mlpf/w2uvzX/uc9QvpPzuSO78UB51+DDJNE5IIIBhmKmUju3RB77yg12Y2OjWEUWiVvNWV73VVb64mO3vq2ZzQCBRN4i9eMvo/3TLjnkvM5THGHNXVqY73S1CAgEw9tnBrn5riIcb+MpPpeD8ukC61Uq3tuT2drq1xRgTtZpbr3urqyYqTh5I5g6fp9HuLq306zmUR31QDOUVAgkE06nfwNfoa4jzA184AFWBiSKaGeJLS9Qr6ui1nDaQ8r/48r9eQ3kmkKirhKG8k0MCwcQ4duBruDXEDANfUyTd2qJFdPrw0FlY8FZX3ZUVb3W1+5ZDB5JBXSUzlMe6xl2xXvxYSCCwZogPz3fID3z1W0OMs9HZRFEkG41sb29wFJHRA4nkh/JYr0mm/FAe1osjgaAwY/jwPMPAF5xSx3pub3WVposGn5f0CyR3ZYUvLbkrK6c9s8kP5dFfxIChvNm59AMSCAap7IfnAU6Lokhub9MeS1Hkrqyc5IQm29+XjQbFhumX86Ulsbw8XCDlq6I/IjNzOVNDeUig2TJ9H54HOK0B67lPeA/UNyojkIyTrxef6KE8JNBkw4fnAYZGUaSaTbOemzpGpz2UjyGQCPWT8kN5k37pByRQteDD8wDj1/HRon7ruU9obIGUf0QzlDdZl35AApUOH54HmBS61aLPFdF6booisbw8YBHdscYfSEb1L/2ABDo1fHgeYBac/KNFp2IxkEilLv2ABOqU7e8nm5sMH54HAMbYZ78qgqIouHixqD/nAYHknz8/zrGNk1/6QdRq/nvvFfKgSKBOstG4cPs2PjwPAB3MR4uefu97JXVTTCDJ7W2KujIe5bQldVz6gTG28eGHhdw5EggAAOw4a7sAAACYUUggAACwAwkEAAB2vGq7gGpJ01Qp5Xme4zicc9vlAMBMkFIyxjjnnHOllNbacRzHcbIsoxvQj8ZfGBXjuq4pUgihtS6qMPSBXorjmHMeRZFSKkkS2+UAwKzgnGdZRodyIYSUUghB/3Rdl3OepmmapuMvTAiRJAllj+u6Ukpzdl5IYUigl7TWQgjGmO/7lPkAAGNACUTbWuuOXgXnPAgCc4Mx45xTAvX80YiFIYFe8jwvjuMwDKWUSCAAGCcTQmmaep6X/5HWmkZoLJXGfN+P47i7ffTCMA/0khCC+kBSyjiOgyCwXREAzArf98MwjKKIJoGoMcuyMAw5557nWUwgznl+UqrAwpBAL5nUoeFO2+UAQOWEYcgYi6KopPtXStF5MOGc+75f0mOdCgWk+WdRhSGBXjIrPaSUHb1gAADGWJIk6+vrJd2553lhGG5sbNA/pZRpmkopLeaQlJJWZtHsOC3RKrAwXJXnM7TWSilMAgFANyllqQk0g5BAAAAnkqapWSQNhUACAQCAHViNDQAAdiCBAADADiQQAADYgdXYL9F38erDQ7646NbrYnnZdkUAANNs1lciUOqYb2h3Fha+8ju/8/jv/5623ZUVUashjQDAOPPBBxs3b7orK6U+yoU7d/ji4tq1a6U+yhDCBw/Sra2dXhfpGcIs9oHoy95N6jDG3Ho9uHTJXVkxe5VJpjBNWZoijQBgzLL9fdsl9Jbt7RV1V7OSQGp3V25vq2aTxtlYr9TJQxoBAJRtmhOoZ+r458/3S51+kEYAAGWYtgTqTh1Rqw2ROv0gjQAAijINCZTt78tGQ25vd6SOWF5263Vnfr6kx0UaAUBJRK2mmk3bVfTgrqzEDx8WdW+TmkCUOtTXoWmx8aROP0gjAIDTmqQE6pk63uqqrdTpB2kEAHASVU+g7tThS0vuyop7+XKlUqcfpBEADMd8XKSCZKNRyMx6FRNIt1pye5s+tZNPHXHxoruywhcXbRc4JKQRAEBeVRLIpA6tZGPTkjr9II0AAGwmUL/UocXT05c6/SCNAGA2jTuBulOHDrLe6qq3ujo7qdMP0ggAGGOVPRgWW9iYEqj7AqCUOjiYDoA0AphZSKBR9UydyPNw0BwC0ggApk/BCYTUGQOkEcCMyPb3q9kZKuq63QUk0Em+7ABKgjQCmGJIoN5O+2UHMAZIIwCYLMMkUPjgAV2ZbrgvO4AxGJBGwaVL0eXLdssDmFxuve4sLJT9KM7CwngeaAhuvV5Uz2yYb+nO9vez/X2kziSSjYazsIBuEABUwTAJBAAAMLqztgsAAIAZhQQCAAA7kEAAAGDHMWvhlFJaa9d1GWNSSsaYEEJrnWUZ3YBzzjkvu0oYDr1l9B7RW+k4juM4ePsArDv2z9NxHCEEbdMNaFsI4ThOeYX1POxnWVZGFhzTBxJCJElCj+e6rpTScRx6GNd1OedpmqZpetpHhfHgnGdZRu+XEEJKKYTA2wdQBYP/POmgb/480zR1Xdd13XwUlaTnYb+kLDh+FI5zTo/a80dBEJgMhKqhXZy2tdYdZyh4+wAsGvzn6TiO53nmBtQZon+OYdyi52G/jCw40TyQ7/txHHe3a63jOMYwTpWZvTxNU8/z8j/C2wdgV88/zyzLwjAMw/DKlSumkTaSJAmCYDy19TzsF54FJ7omAuc8PzrJXrxGnHPP83AIqzLf98MwjKKIRpmpEW8fQBX0/PPknPu+z14c1qMoovY4jk17qfNApoyOw37PxhEPJie9Kg+9Uvk66LWAiaCUMlOaDG8fwLDoMGhSoRAdf54GLUygbSmlmepXStGKgLJ1HPZ7No54MHnl1q1bA34spUzT9OjoiF4gKaXnedT45MmTLMt6vnBQKY7jhGFo3mi8fQBDu3LlShAEBY4c9PvzlFJKKefm5t59913GWBiGR0dH1MheTAuVZMBhv/AswFV5AABOREqZJMn6+rrtQqYHPpEKAHAiWutix98AfSAAALADfSAAALADCQQAAHYggWbLj//pn3SrZbsKAADGhviW7mRzkzHmv/deCcVA8dTurmo2s/191WzK7e0v/MZv/J9f/pIvLYnlZb605K6siFrNmZ+3XSYAfEayuckXFyv4VdSy0cj294uKgFMnkNze1q0WEqiyOiKHGilygkuX/uCtt1r/9m/00/jhw/jhQ/NTBBJAdaRbW4yxaiaQajatJRBfWlLNZiGPDYUYHDmDQyX/uwgkABizUycQWDdK5HQQy8tiebnnPSOQAKBswySQPjwsvA4YgLq9utUaPXIGQyABwDgNk0AYhSsbRQ4d+s2rXUbkDIZAArCFLy5m+/u2qygdRuEqoWfkiFpN1Gre6qqo1aowIYlAAhgbZ2GBIYGgJBMROYMhkABgREigMZmCyBkMgQRQLIzC9ZA/ysAAUx85gyGQAEaU7e3ZLqF0p04gZ2GhjDqmwIxHzmAIJADohlG44SFyhoZAAgA2dAKp3d1ZG457/nGcRiPb21O7u6aDjMgZHQIJYDYNmUCz8KHUfpHj1uve6qozP4/IKQkCCcBdWaFdfbphFO6lwZHDFxdFrTZrPb8qQCABTKuZTiBEziRCIAFMjdlaC4fImT4IJJhistGY7qH+Kf88ECJn1iCQACbIdI7CJZubcnsbkQMnDyS3Xsf3LgKM2Zl2u33a35GNRsXPHK/fu5ft74taDZEDA+QDSdRq0eXLtisCeI7Gbyp4pM3293WrVdRBdZgEAgAAGN1Z2wUAAMCMQgIBAIAdSCAAALCjx1o4pZTW2nVdxpiUkjEmhMiyrLtRa51lGf0W55xzPra6qQZ6UCrYcRzHcUw9juMIIczToUYhhOM4YysSKqvnTm5xf4apl6apUiqKouF+nQ5xxZaUR+W5riuEUEoppYIg6HnLMAyHfhbdevSBhBBJktCfpeu6Uko6mnc30p+o67qc8zRN0zQtqqxjcc6zLKMChBBSSiGEqYcik+pJ09R1Xdd181EEM65q+zNMPc/zRvl1pVRRlXQLw9BxHMqVMAzpENrvxr7vF/jQvT8PxDmXUtIZ4uBG86MgCOI4LrCywSiBaFtr3XG66jiO53lUD72UdGOc1YJRqf0ZZgTtVFpr3/fpcKSUStOUc06N1NGRUkop6SgXRZGUMk1TM/BjMoB+l064B8fGAHQqT38IdPpF7UmS0E+DIMhXZc716adxHHPOafyp45am/kEP3+4lCIKdnZ0oimh7QOPGxka73T44OIiiaH19vee9lWRtbW1nZ4c2Dg4OTGMQBEEQeJ5HPzXFj7M2qL6q7c8w9TzPM0cq2uVoT6OW/Laxvr7+9OnT9os9s0MQBHSHOzs7dLMhbGxsdN95FEXmntfW1jpu31EDHWkH199P32simFgb3JhlWRiGnHPP88bcw/B9n0Yk8yOk5gRBax3HMcVvHMemEfNAYFRqf4apR/sbbdOGUop2uY5bmv734ENWEARJktBtih0cM1Pm+dGmfugvxXEcM81xwvrZ4Kvy0CF+cGO+S2iFUqpn35MWJjDGqDNoOrw9R11gZlVtf4YZQQdrzrkQomOXU0qZRBk8GUkDX+yzJ9ynRXOi5sDY74h6cievnzH2yq1btzqaaMzx6OiI6pBSep43oPHJkydZlo1Y9HAcxwnD0DwFUw+NQs7Nzb377rthGB4dHVELezEtBDOumvszTLEkSWiNGWMsTdNz585RPyPLsk8++eTo6ChN00ajIYSYm5v75JNPGGNSykajoZT6xje+wV50LNI0ffbsWb1eZ4zdvXuXVinfv39fCEGNpzU3Nzc3N3f37l0qcm5u7tmzZ/QoZsHOu+++SwdbKaVSyvyNKKXu379Pf0d0y6tXr/arvydclQcAYHxoMj8/NqW17h6eOeHNejYWVdgY7g0JBAAAduCaCAAAYAcSCAAA7EACAQCAHUggAACwYzq/pRvghOgLUtXubuMnP3nzjTechQV3ZYUvLvLFRdulAUy/U6+Fu3DnDmNs48MPy6mnGGc++GDj5k13ZcV2IVAt9M3H9M3c2f6+3N7O/3T5t35r92c/M/+kb3nnS0tieZn+P/Z6AabcqftAolZTzWYJlQAUTzYaFDYUPPrwkNopXYJLl6jHEz96lG5t/a/vf9+Zn38eTo2GPjzM9vfjhw/NvfGlJb64+DyZFhdxigMwIozCwfRQu7vZ3p7a3X3ey9nbo3ZKDv/8+Z69Gd1qpVtb/vnzzvw8Y6w7XUzPiTaSx49NkjkLC6JWE7WaMz9vNsb1dAEmHhIIJpXprGR7e9TLoXZKBW91lVLh2J4KJYr/3nv9buDMz7srKx33k+9dpVtbJu0YY269zhcXaUrJWVjA8B1AP8MkEEbhYPw6OiL5KRy3Xhe1mluvuysrQ/RCks1Nb3X1tEsPuoPN9MAoEeXjx2YELz+lRMl0qscCmFbDJJAZggAoz7FTOGJ5mbZHeZRkczPb21u7dm30gsXyslhe9lZXTUvHlFJ++K5jSgnDdzCbMAoHlZDvQND/qd1M4dBhuvARrXRr6yQjdcM5dkopP3zXMaWEFeEwC5BAYEG+c0BHZGrvmMIpu2egdnfl9vba+++X9xAdjp1Sktvb+VFuM6WEFeEwlU6dQBgrgNPqOPHPD6nlp3DGf9afPH7sLCwMWIMwHgOmlGj4Tj5+bH5kppSwIhymwDCfByqhDJgq+ZP6/KpoOnr2WxU9ZrrVSh4/Di5dslhDPwOmlBhjHSvCzZQSVoTDxMEoHIyqYw1Y95BaNc/W40ePGGPWO0AnNNyUEmMMFxmCKhsygXSrhfOs2fR8MfSL9V0dq6Ldet1bXZ2IM3H6FOrkHpoHTylle3s0pWRWhLv1ujM/j4sMQaUMmUCq2azaKS2URDYaPadwKGbMhW0m61BOi7C9IhZhV0rPKSV9eNjzIkOiVqPeUjU7qTALMAoHn2EuadN9YRuxvFyRKZzRJY8f0/IH24WUjt6p/DM1U3TdFxnClBKMGRJoppnvJpisKZwRUa9unIuwK6XflFLPiwyZThK+twLKgASaIQO+m4BWRU/KFM6I0q0tvrQ0KWsQxoCmlDoan39ai5aZNJv5KSV8bwUUBauxp9kJv5tgps5q6eo41VyEXSmUSQMuMoTvrYDR4ROp02O47yaYNcnmJmMsuHjRdiGTB99bAYUbZhTOrdedhYXCSylQ9Sss3IU7d2hU7bTfTTBrsr0981VAMKJTfW9FcOlSdPmyjTKhuk79Ld1QTcnmJi6xDNVEvfPRL2QO0wcJBAAAdpy1XQAAAMwoJBAAANiBBAIAADuOWQuXZVmWZY7jCCGklIwxIYTjOIyxNE2VUp7nOY7DOR9HsSMIwzCKIvNPpVSapkIIz/MsVjUcpZTW2nVdxph5U7TWWZbRDTjn1X9HCkcvBe2r1EJ7L22b/bajnV6oGX/pCtdzF82yrLvRvCkwo9rH2djYMNtBENBGFEVPnz5tt9tra2umscp2dna6G/NPbbJ4nmeKN68/tezs7ERRtL6+bq04e2hvPDg4MC304jx9+jQIAtpjjY2NDfMa4qUrXM9dtGcjzLIhR+G01nSm6fs+ndQUQkoZxzFjLMuyMAyTJKHt69evx3GcJEkYhlprxliaptevX0/TlDFG7XRWpZSiX4zjmG4ppQzDUCl17KPT76ZpGsfxSW5vEeecnm/PHwVBYE7qZ4rWOggC2m3yhBBRFNHeMsAsv3SF67mLDthvYTYNmUCe58VxTMf9AhPIdV0KNs55FEV0LOCcO47jeZ7v++b4QqN/NIbmui7n3HVdrbWUMooi3/d936dbuq4bRdFJOvtpmgZB4HneRAzN+b5Pad1Bax3H8QwOJdFwseM4dObR7dh9YGZfupL03EX77bcwm4a8MqkQgqKCei1BEBRaVQ90XMgfXyhj6P80x6OUos7TcA9B8aa1dhzH9/2iKi8JBXP+hJ2eO+fc87wZPIymaWpOsZVSZjaoA+0zHY0z/tKVpHsX7dcIM+v4BMr3csxZpEkd13WL7VabgOl3Jmtwzmlu01TFORdCDB0eWZbRk6Jz4fzKhWryfT8ft5zz6gdnebTWpk8chmFHApndyRz7tNYmbGb8pStPxy46oBFm0yu3bt0afAvHce7fv390dJQkybe+9a25uTnG2Pe///1nz55Ro+d5586dK6qgRqPx5MkTpdSTJ0+ePHly9epVpRQVIIRIkkRKefXqVXP7b3/721EUUVV0bvXJJ58cHR2ladpoNIQQNFRId5hlGR2YejbevXuXIu3+/ftCiHq9XtSTKpaUMk1TekHon57nUWP+6cyUOI6fPXt27ty5c+fO0U5ydHSktabdQEr55MkT2nuVUo1Gg/5PO9KMv3RlGLCLdjTarhQsO9FVebTWSqmO+Z6ejYUw0z/D/foohZX3pKA6RtzBAKAouC4cAADYgWsiAACAHUggAACwAwkEAAB2IIEAAMAOJNDEix89qt28+bXvfU+3WrZrqbRkc9P50z8992d/Fj54YLuWmRM+eCAbDdtVQOUggSaYbrWufPRRmKZ/8NZbuz//+dvf/S7+yHtSu7sX7ty5/vHHX/3Sl/7b174WP3z4dhDgtRqn+OFDvODQbcir8oB1anf3+scfq2Zz7f33/ffey/b3r3z00YXbtyPPCy5etF1dVehWK370KH74kC8trd+44a2uMsb++9e/Hqbphdu3vdXVtfffd+bnbZcJMKOOvyYCVFC6tXXlo4/YmTMbN29+4ytfYYw5CwveH/0RHXCf/fKX7/7e78299prtMi1Lt7au/OVffvLjHweXLv31N78pajVq54uL/vnzc5/73P2//du7GxuMsXd/93dtFjoD7kopajV3ZcV2IVAt+ETq5AkfPIgfPnTr9fUbN7rP35PNzesffyxqtfUbN/jiopUKrcv296/fuye3t916PfI8sbzc72ZhmqZbW6JWizwPx8fyXLhzR9Rq0eXLtguBakECTRKa+JHb28GlSwP+mNXu7oXbtxlja9eu0bjTTAkfPEgeP2aMRZ7nv/fesbeXjcb1e/eyvb3g0qXg4kUMypUBCQQ9IYEmhtrdvfLRR/rw8CQHVt1qXbh9WzWbg7Nqypgs8c+fjzzvVFlCPUtnYeGEuQWnggSCnpBAkyHZ3AzTlC8urr3/fr8xpW50VJ2F+XbdaoVpmjx+PMp4Wn7sbu3atZkdwyzDhTt3GGMbH35ouxCoFiTQBBglSIaLrskSP3oUP3rEGAsuXhx9HWC6tRU+eIBBuWIhgaAnJFClFTKYdqrhu8lilqR7q6uR5xXVa8mv4Y4uX57BubTCIYGgJyRQdRW4oOCESxgmSD4k1q5dK2MZm9rdDdNUbm8XG2+zCQkEPeHzQBWVbG7+8Z//ef2ttzZu3hz90ypzr7129Z13jv793+OHD5/84z9+46tfnehPCyWbm3/8gx9QoP71N79ZL+4revPOvfHG1XfeoY8NJZubutXCcu2hNX7602e/+MXVd96xXQhUC/pAlWMm1YdY0HWsdGvr+r17zsLC+o0bkzgtZGWxgHlHyutvTb3wwQPVbKIPBB1wXbhqyfb3L9y+nTx+HHne2rVrhU+De6urGzdvOvPzX791K9ncLPbOS6VbrfDBg7e/+13VbK7fuLHx4YdjGxZz5ufXrl2j1+3C7dvX793DRWABCoE+UIXIRuPKRx8xxtZv3Cj1RFu3Wtc//jjd2vLPn1+7dq28BypKdT40aj7uWsi6u9lBr9vBX/yF7UKgWpBAVRE/ehSmqajV6Fx7Kh9xCObCOYOvrzPjJVUffaKg/Vd/ZbsQqBYkkH0WeyRj63UNp8odjup0yyYCEgh6QgJZ1vElC+MvgL7WQTWblfpaB9lohGmqms0ylmMUxawIdxYWZvMSfCeHBIKekEA2URfE+sq0/Oq7KkwLXb93b4IWnuWX52GtVz805IsEgg5IIJvo0F+Rc3xaGleFiybEjx7pVmuyPjmbbG5m+/uTVfM46VZLNZvVP5+AMUMCAQCAHfg8EAAA2IEEAgAAO5BAAABgx6u2C5ghSimtteu6jDEpJWNMs2rF5gAAB6BJREFUCJFlWXej4zgl1ZBlWZZltM0555ybH6VpqpTyPE8I0d3oOE7+xmWUkW90HGcMZYzu2CfS8SLD6LTW5f2BwJihDzQ+QogkSShmXNeVUtJxtruxvBroaOi6Luc8TdM0Tak9DEPHcaIoyrIsSRJqjOOYcx5FkVLKNJZXhmmkYDa1lVfG6DjndPbAXtTfrxGKopSyXQIUBn2gseKcSynN4WlA4xgqCYIgjmPGmFLKHDQ9zwvDkG6jtaaOiO/7lJGllmE4juN5nmkcQxlTTymVpilFO2W8lFIpFQQBnXNwzn3fT5IkyzI6B9JaU4eYfpdzrrX2fZ9+Srf0PI9OFKIool+h0wW6kyAIzEM7jqO1DoIgSRLqxDiOQz3FKIp6Pm7PCqWUaZrSbkAt5iFOUiFUThvGKAiCnZ2dKIpoe0BjeTY2Ntrt9sHBQRRF6+vr7XabHjp/A7rN06dPoygKgoD+Waz19fWNjY319fUgCJ4+fUqNa2trQRAEQeB53s7ODjWWWkYhzCt2bKMtQRAcHBy02+2dnR3zaufLMzue7/t0S/NbZvfIb9Mtaf/pvpP2Z3dv8+v0Jq6trdGvD3jcARV2vKqnqhCqBn2gceOcm7O/wY0lybIsDEPOued5NEZED23Gi7TWtC2EoM6HlDKOYzqlLYOZ8jFntXQ2TeetYyvjtJIkoWqPbbQu3/kYXB71Tsy2Uop2mJ639Dyv3/3Qe5fv3Ju7NXvdgMc94fNijA1dIVQBEsgC3/e7/2B6NpbBHOUNz/OSJDFHdinl2toaY8wc7mmCqtgyHMehYxPnvPuonT8elVrGKMwZg8nsfo3WZVlGr2E+2rXW9FOzQdtmqp+eghBiiEylOxFCxHF8khHmjsftKCxfYYehK4QqQAKND41r09HWdV2aVO/ZWGoNNIyezyE63MdxTIPvpt2szZNSFnsuacqIokgIQYsRHMcxQ/wsdyJcXhkjchyH3i+abxjQaB0tJqQZx/wiQypVa22m96l+OuJTL9nsG0op04UKw1ApRedMZoqFc25uqbWOoojWLppdi2ZlaPGn1prSmnb+jscdUCHnPAxD13XpGdEKyRNWCJVjexgQquLg4KB73qJn4/hVpIxuOzs7Zr5qcKN1PV/D7lJ7zkSe6vXf2NgwMzon//V+M6A9X8zuO6zsHgKD4bpwAPAcdcc9zxvzykxbjwvWIYEAAMAOfCIVAADsQAIBAIAdSCAAALADCQQAAHYggWy6cOdO+OCB7Sqek43GmQ8+sF3F8zJko2G7kFM788EH1Xk3ASYCEggAAOxAAgEAgB1IIKgWUavZLmFIk1s5gC1IIKgWZ37edglDchYWbJcAMGGQQAAAYAcSCKpINZu2SxhGtrdnuwSASYIEgirSrZbtEoYxoWUD2IIEAgAAO5BAAABgBxIIKmdCF5VN7io+AFuQQFA5E/rBGr60ZLsEgAmDBAIAADuQQACF0YeHtksAmCRIIKiiCf080ISWDWALEggAAOxAAgEAgB1IIAAAsONV2wXMNFGr8cVF21U85ywsuPW67SoYY8xdWZnEy9u4KyuYBwI4lTPtdtt2DQAAMIswCgcAAHYggQAAwA4kEAAA2IEEAgAAO5BAFaKUCsMwTdOeP03TNAzDMh70woULhd9tGbTWtksYktY6fKHfbQa/+wBTCauxK0QIIYSQUvb8qed5SqkyHrffI1aNUsp1XdtVDCNJkiAIHMcZcJvB7z7AVEICFSlNU6WU7/ucc6VUkiSe57muq5RK09RxHK21ORIlSZJlmed5dNobRVHP+zS/yzkvo2YhRP6wTg8nhMiyzHVdIURRDxTHsdbacRzHcbIsYy+ecs8Xp7sMKWWapnSA5pz7vj9iPfT608NprT3PoyfbXU/Pt5XaOedaa9/3B7ynSZJIKakD57qu67pSSqVUEARZliVJUsjTAZhIbShUEATtdnttbc1s5zcODg7Mdrvd9n1/fX294x42NjbM9sHBQRRFtP306VPP88qoOf+IQRAcHBy02+2dnZ2nT58W+yj0svi+3z7uxelZRr7OQvi+T49yknraubc1/77kt9t93tP8m046XvOe7QBTD32ggplTZjrfZ4xJKU0no2McxnEcz/MG3JtSyvRChBAldYPyfaAgCJIkoeILPzGn+vMvQr8Xp9QyDOqQ5R+9Xz0db6tSKsuynpM6x76nAGAggYpHAyxxHFN4CCHiOB5uAoNzbiYGtNY0eFWqLMuCIKCHi+O439hgUfq9OOMpQ2ttThRolGzAm5V/WznnQohRotGsqpjc5RUAo0MCFYymCjjn5hhKUzh05JJSmhPkMAxp+RN7MWFA21prKSXNDVCnIUkSOu92HCd/kl4GOtOn5CtwEogxRnMhruuaKE2ShJ5j94vTswzOeRiGNK/GOR+9q+E4DvW0GGN0b/3erI63lTpPdDN6XyiNut9Tepqm0byn1M4Y01rTApPud3/EZwdQfbgu3JjQgWa48MiyTGtdbB4MMEqpBT7iyRuHFoZhz97VCR9lxGIohksaWQWYCEggmFH5xYq2awGYUUggAACwA9dEAAAAO5BAAABgBxIIAADsQAIBAIAdSCAAALADCQQAAHb8f0Hf4725e+P4AAAAAElFTkSuQmCC", "text/plain": [ "Tree('ROOT', [Tree('SINV', [Tree('S', [Tree('NP', [Tree('NNP', ['US'])]), Tree('VP', [Tree('VBZ', ['unveils']), Tree('NP', [Tree('NP', [Tree('NN', ['world']), Tree('POS', [\"'s\"])]), Tree('ADJP', [Tree('RBS', ['most']), Tree('JJ', ['powerful'])]), Tree('NN', ['supercomputer'])])])]), Tree(',', [',']), Tree('VP', [Tree('VBZ', ['beats'])]), Tree('NP', [Tree('NNP', ['China'])])])])" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.display import display\n", "os.environ['PATH'] = os.environ['PATH']+\";C:\\\\Program Files\\\\gs\\\\gs9.09\\\\bin\\\\\"\n", "display(result[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dependency parsing" ] }, { "cell_type": "code", "execution_count": 448, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]<---US[compound]--->[]\n", "--------\n", "['US']<---unveils[nsubj]--->['supercomputer', ',']\n", "--------\n", "[]<---world[poss]--->[\"'s\"]\n", "--------\n", "[]<---'s[case]--->[]\n", "--------\n", "[]<---most[amod]--->[]\n", "--------\n", "[]<---powerful[compound]--->[]\n", "--------\n", "['world', 'most', 'powerful']<---supercomputer[appos]--->[]\n", "--------\n", "[]<---,[punct]--->[]\n", "--------\n", "['unveils']<---beats[ROOT]--->['China']\n", "--------\n", "[]<---China[dobj]--->[]\n", "--------\n" ] } ], "source": [ "dependency_pattern = '{left}<---{word}[{w_type}]--->{right}\\n--------'\n", "for token in sentence_nlp:\n", " print(dependency_pattern.format(word=token.orth_, \n", " w_type=token.dep_,\n", " left=[t.orth_ \n", " for t \n", " in token.lefts],\n", " right=[t.orth_ \n", " for t \n", " in token.rights]))" ] }, { "cell_type": "code", "execution_count": 449, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Program Files\\Anaconda3\\lib\\runpy.py:184: DeprecationWarning: Positional arguments to Doc.merge are deprecated. Instead, use the keyword arguments, for example tag=, lemma= or ent_type=.\n", " \"__main__\", mod_spec)\n" ] }, { "data": { "text/html": [ "\n", "\n", " US\n", " PROPN\n", "\n", "\n", "\n", " unveils\n", " NOUN\n", "\n", "\n", "\n", " world\n", " NOUN\n", "\n", "\n", "\n", " 's\n", " PART\n", "\n", "\n", "\n", " most\n", " ADJ\n", "\n", "\n", "\n", " powerful\n", " NOUN\n", "\n", "\n", "\n", " supercomputer,\n", " NOUN\n", "\n", "\n", "\n", " beats\n", " NOUN\n", "\n", "\n", "\n", " China\n", " PROPN\n", "\n", "\n", "\n", " \n", " \n", " compound\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nsubj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " poss\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " case\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " amod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " compound\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " appos\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " dobj\n", " \n", " \n", "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from spacy import displacy\n", "\n", "displacy.render(sentence_nlp, jupyter=True, \n", " options={'distance': 110,\n", " 'arrow_stroke': 2,\n", " 'arrow_width': 8})" ] }, { "cell_type": "code", "execution_count": 450, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(beats (unveils US (supercomputer (world 's) (powerful most))) China)\n" ] } ], "source": [ "from nltk.parse.stanford import StanfordDependencyParser\n", "sdp = StanfordDependencyParser(path_to_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser.jar',\n", " path_to_models_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser-3.5.2-models.jar') \n", "result = list(sdp.raw_parse(sentence)) \n", "dep_tree = [parse.tree() for parse in result][0]\n", "print(dep_tree)" ] }, { "cell_type": "code", "execution_count": 451, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAM0AAACtCAIAAACcKmxkAAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAHXRFWHRTb2Z0d2FyZQBHUEwgR2hvc3RzY3JpcHQgOS4wOfoZEaQAAAygSURBVHic7Z0xj9tGGobHd8EV2YqFtwgC2JjSCg44EOoCxAUXB2zgznQbpKH/QACqdEkCrtyZ3SLd0qWjRlNsai2RQ4BVc1hiN02QXWAHyJ0XTrVXfPYcQ1IUJVLDIfU+hUHJJDWkXn0zGi0f3ru7u2MAbJm/dN0AsBMgZ0AHyBnQAXIGdICcAR0gZ0AHyNlq4jieTCYbby6lbLExPQU5W43ruk02T5KkrZb0l3uYp63DZDKxLIsxJqX0PI9zzhhLkiSOY845PWlZlhBCCME5T9M0CALGmBAijmPalnPueR7tkLa1bTtNU8dxbNvu7uC0cAdq4Lruzc0NLfu+f3d3d3NzEwQBPZNdJo6Pj09PT2l5NpsVd+j7Pu3w/PxcrTlgPuk65/2Ac041iTFGC0mSpGmaG7eFYUgLUkq1fim+70dRRKupIjdgkLO1oXE959y27WxEkiRRoYnjuHonaZr6vk97C8OQOtkB89cXL1503QbTiaIoSRIazsdx/Nlnn9m2bVlWmqZv3759//59HMeLxeLLL798+/YtY0wIsVgskiR58uQJ7YHqXBzHv/7662g0Yoy9evUqTVMp5ffff2/bNj05ZLruuPvEbDZTozTi5uYmN/wqrlO62rInhwq+bwIdYP4M6AA5AzpAzoAOkLP2+dcvv8jb265bYRaYP2sHeXsrzs7EYiEWi/+8f3/9++/OaOQ8euSMRvaDB123rnvwfbMRyeXlh3idnTHG+P6+8+jR3z///L9//KGetPb23PHYfvjQHY+tTz/tusndgJytTbZ0pVdXjLGK0hXP58nlZTyf05r2w4fOaOSOx7tW5JCzupSWLopXnSqVXl/H83lycSEWC/nunbW3R9s6jx7x+/e33/yOQc6qWKt01Yd2KM7OkosL9rHIUWrbarlpIGclNCxd9Umvr8VikVxcxPO5fPeOMaZGcgMrcsjZB7ZUuupDwzhV5Pj+Pg3jWg93J+x6zrSVrvrI21s1kusk8dtgF3PWeemqT+nHoI9TJDuUMwNLV33os9HfKZKB56xHpas+aookns9ZZh7Y5CmSYeas16VrLfoyRTKcnA2ydNWHpkjoDKh5YHOmSHqfs90pXfVZNkXijsddNamXOdvx0lUfNUWi5oG7OlF9yhlKVxO6PXum5wylq3WKp1TDFImhOUPp0sOyKZLW54GNy9nzoyOUrk6gP5XLTpG447F/eNjKzo3L2cHLl/z+fZSuDlFTJHx/P3j6tJV9GpczMEhwvRPQAXIGdICcAR2YnrOcyC5JkslkstIuBpZBTmchhJRSCKHEgEWaqJ9L0GonWp/z8/Pik7ujc2oX3/fp1M1mM8/z7irPZOmZ35h2rkcXQiRJ4vt+mqZRFJHwN03TMAzJuEn6Qsuy4jgWQjiO47puFEVkAXYcZ5lT2LZtkg5XsHNO4Y1I05Rz7jgOY8xxHHVWoyhiH/2S5DotnvnSt1KtmfU+L6WtwGY/GWQKpgX6WGRNwep/z8/PX79+fVfpFC79wOWe3DWn8GbMZrPiyQyCQJ06ei+y62cflr6Viqz3uZSt+zXoM2FZlrpdg+d5URTRv/QhKHUK12fXnMItQgJUxhjVpOqVi29lfe9zazlTr73y9h90SNmWFZ3Ca7FrTuHNsG07iiLqNxljSZI0HGCs5X1us55RTy+lJGUwuYNV6creFsRxnGfPnh0fH9ND6vjDMLRtW7Weyht9LVJ3eCh9Mo7jJEk45zSqaPGIhoRlWY7jTCYTx3HoREkp4zhO01S9QTSGK57k0reSc06j7TRNqXCoEJewcX9f5Pz8vMmXlCZe4J1yCjek1NS87b3h902gA9PnacEwQM6ADpAzoAPkDOjAIA+yWCx+urz8x4MHpl1LDZpjRM7EYhFOp/SHwunVlTMa+YeHSNuQ6HheI5sw//DQ++or9QzSNiQ6y1kxYaX/i7QNgw5yVp2w0jWRtr6jNWf1E1a6FdLWXzTlbLOEle4BaesjW89Zcnk5ieMmCcuCtPWULeYsvb4Op9Po5KSVhGVB2nrHVnKmEmbt7XmPH7d17XwOpK1HtJyzXML8w8NtOzKQtl7QWs70JywL0mY4LeSs24RlQdqMpVHOzElYFqTNQDbMmZkJy4K0GcXaOZO3t+F0Gv7wg7EJy4K0GcIaOaOERScn8t07/+uvDU9YFqStc2rlLJswqmEm3GJjXZC2DlmRs2EkLAvS1gkrcvb86Cg6ORlGwrKotJ2+eAGZtwZW5Cy9vmaMDSlhWZLLS4RMD7geHegA19UBHSBnQAfIGdDBn67fJJGVZVm2bQsh2EffHwnGXNe1LGulLXY3WSk03HH+VM8oQ6SqIxsb6e8450EQkGmtm2YaT9YiCIqsvh5dSknJ8zyPilxbFD3Zpd5u0nJTtZBSuq5L2secn5sxRmu6rksaSxKIkkyUZKWWZZFhlDYn1WpWb0s+adq29HVLWyiEILMhY0xZJuu3cCco6vnUMomxT09PgyBQ6vkWKfVkl3q7Pc9T0kDaapmf2/O84+Pj3KsUl9XCzc0NHRrZpsmqv+x1K1qYOzlrtXAXWF3PbNumeka3z6B60Ar1PdlUadRyhZ/bsizXdZfth+oH3cBArU8LSiZd8bo1j4tVGsSrWzhU8jkrvgcqWzRia/G1Sz3Zpd5uKaUaaEspN/Zz005s2w7DsErau+R1cw2rMIs3NIgPj3zOXNcl77UQgkKgvngKIdr9IC7zZOe83Ywxy7Ko8lELS/3cjLHJZEI3gGKZoQ/nXK0ppQyCgL41q8Ok0RLpoqWUND4jt3TudStaSJpqurEL55y+m9ds4U5Q7EqL7urt2axL91z0dmfHWJu1qqiFrrN56euWtrB0h7CAK3rw+ybNp7iuW6enG8DrDpIe5AwMAPzuBHSAnAEdIGdAB0Z4kPVD159Of/75b5988vqbb3CVwLbZxe8BkzdvopMTxtg/v/ji37/9llxcuONx4LpD/fN0E9itnInF4vnRUXp15T1+HLguXX8aTqfhdMoY255CC+xKztLr6+dHR+LszH74MHDdXEcpb28ncUxKQHSj22D4OcuKGgLXrdZ7T+IY3eg2GHjOoh9/DKfT9OqqvqgB3eg2GGzOlH/ZGY0C113rOk10o60zwJypjpLv7wdPn7rj8Wb7QTfaIkPLGfV6LRqN0I22wnBytr3yg260OUPIWfZGBdvLAbrRJvQ+Z2pyX0+/hm50M3qcs9LJfQ2gG92AXuasenJfD+hG16JnOas/ua8HdKM16VPONpjc1wC60Tr0I2dNJvf1gG60GtNz1tbkvh7QjS7D6JzF8/nzo6N+3a4g243OvvsOhY0wOmfJ5WV0ctJH1bdYLOL5XOdsi+EYnTMwGHC9E9ABcgZ0gJwBHfTs+k2lGi0asshy1QvZE/neaLmiwRUH2z90yonaYpnsaZlGyjSCIMhJsioYhtlKaz2jkuN5Huc8a33KWYnXUgarbZsL5yuEy9m2VR/FSvFxFEVCCHL3OY5DlsyiW7nhsRiH5lxTySHp8DIrsVp5mTJYfcSziuHT01PXdRs2ryg+Lm1b8Sg2VjPnjqj4v6hnm6DqgfK+llqJ1cPqoUmSJEo4att285KWEx8va1vxKCA+rqaD7wHUR5C7lTFW30pchNy2tKzUsk3IiY8r2pY7iobi4zpu5V6jO2c0pqG7RpBnuWglpjVLlcH0UEophFDjmCiKSPaeq0AbkBMfL2tb8Sjqq5mjKFLP0CCP9ll0K5cebF/puuP+QBNlcPY2Fw1pKFxueBRFt/JgwO+b/wfi4+2BnAEd4HcnoAPkDOgAOQM6QM6ADozO2eTNm4OXL7tuxYaIxeLet9923QpTMDpnYDAgZ0AHyBnQAXIGdICcAR0gZ0AHyBnQAXIGdICcAR0gZ0AHyBnQAXIGdICcAR0gZ0AHyBnQAXIGdICcAR0Y7T/rnf44i7W354xGXbfCFHD9JtAB+k2gA+QM6AA5AzpAzoAOjMtZkiQHBwddt2KLDNWkV41xOWOMKUXjIFEavZ3CxHmNg4OD2WymHiqNfpqmjuMoIW1XhGFIblHLsshUSqLGUmt4rvFSSlqHMdZ7ReNadKv5KyWnRPR9nxzYLXobmzCbzci07Xne3SpreLHxw/Bnr4uJvwfkdIq+75MzVklfO4ekslkD9zIzt4GN7wQTc5ZDuYbpdjVm3llnmZm7F43XQA9yRqZ+Urh3PjhjjNHdTGiwReOzKIrInF00cxcbzzmfTCZ0/xTO+a7cWqDrjrsWTTzWOiltZ/HJvhxOi5j4fRMMDxPnz8DwQM6ADpAzoAPkDOgAOQM6QM6ADpAzoIP/AW1nV8SI0u3YAAAAAElFTkSuQmCC", "text/plain": [ "Tree('beats', [Tree('unveils', ['US', Tree('supercomputer', [Tree('world', [\"'s\"]), Tree('powerful', ['most'])])]), 'China'])" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.display import display\n", "os.environ['PATH'] = os.environ['PATH']+\";C:\\\\Program Files\\\\gs\\\\gs9.09\\\\bin\\\\\"\n", "display(dep_tree)" ] }, { "cell_type": "code", "execution_count": 452, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "G\r\n", "\r\n", "\r\n", "0\r\n", "0 (None)\r\n", "\r\n", "\r\n", "9\r\n", "9 (beats)\r\n", "\r\n", "\r\n", "0->9\r\n", "\r\n", "\r\n", "root\r\n", "\r\n", "\r\n", "2\r\n", "2 (unveils)\r\n", "\r\n", "\r\n", "9->2\r\n", "\r\n", "\r\n", "ccomp\r\n", "\r\n", "\r\n", "10\r\n", "10 (China)\r\n", "\r\n", "\r\n", "9->10\r\n", "\r\n", "\r\n", "nsubj\r\n", "\r\n", "\r\n", "1\r\n", "1 (US)\r\n", "\r\n", "\r\n", "2->1\r\n", "\r\n", "\r\n", "nsubj\r\n", "\r\n", "\r\n", "7\r\n", "7 (supercomputer)\r\n", "\r\n", "\r\n", "2->7\r\n", "\r\n", "\r\n", "dobj\r\n", "\r\n", "\r\n", "3\r\n", "3 (world)\r\n", "\r\n", "\r\n", "7->3\r\n", "\r\n", "\r\n", "nmod:poss\r\n", "\r\n", "\r\n", "6\r\n", "6 (powerful)\r\n", "\r\n", "\r\n", "7->6\r\n", "\r\n", "\r\n", "amod\r\n", "\r\n", "\r\n", "4\r\n", "4 ('s)\r\n", "\r\n", "\r\n", "3->4\r\n", "\r\n", "\r\n", "case\r\n", "\r\n", "\r\n", "5\r\n", "5 (most)\r\n", "\r\n", "\r\n", "6->5\r\n", "\r\n", "\r\n", "advmod\r\n", "\r\n", "\r\n", "\r\n" ], "text/plain": [ "" ] }, "execution_count": 452, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from graphviz import Source\n", "\n", "dep_tree_dot_repr = [parse for parse in result][0].to_dot()\n", "source = Source(dep_tree_dot_repr, filename=\"dep_tree\", format=\"png\")\n", "source" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Named Entity Recognition" ] }, { "cell_type": "code", "execution_count": 453, "metadata": {}, "outputs": [], "source": [ "sentence = str(news_df.iloc[1].full_text)\n", "sentence_nlp = nlp(sentence)" ] }, { "cell_type": "code", "execution_count": 454, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(US, 'GPE'), (China, 'GPE'), (US, 'GPE'), (China, 'GPE'), (Sunway, 'ORG'), (TaihuLight, 'ORG'), (200,000, 'CARDINAL'), (second, 'ORDINAL'), (Sunway, 'ORG'), (TaihuLight, 'ORG'), (93,000, 'CARDINAL'), (4,608, 'CARDINAL'), (two, 'CARDINAL')]\n" ] } ], "source": [ "print([(word, word.ent_type_) for word in sentence_nlp if word.ent_type_])" ] }, { "cell_type": "code", "execution_count": 455, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " US\n", " GPE\n", "\n", " unveils world's most powerful supercomputer, beats \n", "\n", " China\n", " GPE\n", "\n", ". The \n", "\n", " US\n", " GPE\n", "\n", " has unveiled the world's most powerful supercomputer called 'Summit', beating the previous record-holder \n", "\n", " China\n", " GPE\n", "\n", "'s \n", "\n", " Sunway TaihuLight\n", " ORG\n", "\n", ". With a peak performance of \n", "\n", " 200,000\n", " CARDINAL\n", "\n", " trillion calculations per \n", "\n", " second\n", " ORDINAL\n", "\n", ", it is over twice as fast as \n", "\n", " Sunway TaihuLight\n", " ORG\n", "\n", ", which is capable of \n", "\n", " 93,000\n", " CARDINAL\n", "\n", " trillion calculations per second. Summit has \n", "\n", " 4,608\n", " CARDINAL\n", "\n", " servers, which reportedly take up the size of \n", "\n", " two\n", " CARDINAL\n", "\n", " tennis courts.
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "displacy.render(sentence_nlp, style='ent', jupyter=True)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "named_entities = []\n", "for sentence in corpus:\n", " temp_entity_name = ''\n", " temp_named_entity = None\n", " sentence = nlp(sentence)\n", " for word in sentence:\n", " term = word.text \n", " tag = word.ent_type_\n", " if tag:\n", " temp_entity_name = ' '.join([temp_entity_name, term]).strip()\n", " temp_named_entity = (temp_entity_name, tag)\n", " else:\n", " if temp_named_entity:\n", " named_entities.append(temp_named_entity)\n", " temp_entity_name = ''\n", " temp_named_entity = None\n", "\n", "entity_frame = pd.DataFrame(named_entities, \n", " columns=['Entity Name', 'Entity Type'])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234567891011121314
Entity NameUSIndiaIndianSingaporeKim Jong - unoneAppletwofirstMessengerChinaCanadianFacebookYahooTrump
Entity TypeGPEGPENORPGPEPERSONCARDINALORGCARDINALORDINALPRODUCTGPENORPORGORGORG
Frequency301212111110988776666
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "Entity Name US India Indian Singapore Kim Jong - un one Apple \n", "Entity Type GPE GPE NORP GPE PERSON CARDINAL ORG \n", "Frequency 30 12 12 11 11 10 9 \n", "\n", " 7 8 9 10 11 12 13 \\\n", "Entity Name two first Messenger China Canadian Facebook Yahoo \n", "Entity Type CARDINAL ORDINAL PRODUCT GPE NORP ORG ORG \n", "Frequency 8 8 7 7 6 6 6 \n", "\n", " 14 \n", "Entity Name Trump \n", "Entity Type ORG \n", "Frequency 6 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_entities = (entity_frame.groupby(by=['Entity Name', 'Entity Type'])\n", " .size()\n", " .sort_values(ascending=False)\n", " .reset_index().rename(columns={0 : 'Frequency'}))\n", "top_entities.T.iloc[:,:15]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234567891011121314
Entity TypePERSONGPEORGDATECARDINALNORPEVENTORDINALPRODUCTMONEYTIMELOCFACQUANTITYWORK_OF_ART
Frequency1651261056766582321151175531
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 \\\n", "Entity Type PERSON GPE ORG DATE CARDINAL NORP EVENT ORDINAL PRODUCT \n", "Frequency 165 126 105 67 66 58 23 21 15 \n", "\n", " 9 10 11 12 13 14 \n", "Entity Type MONEY TIME LOC FAC QUANTITY WORK_OF_ART \n", "Frequency 11 7 5 5 3 1 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_entities = (entity_frame.groupby(by=['Entity Type'])\n", " .size()\n", " .sort_values(ascending=False)\n", " .reset_index().rename(columns={0 : 'Frequency'}))\n", "top_entities.T.iloc[:,:15]" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from nltk.tag import StanfordNERTagger\n", "import os\n", "\n", "java_path = r'C:\\Program Files\\Java\\jdk1.8.0_102\\bin\\java.exe'\n", "os.environ['JAVAHOME'] = java_path\n", "\n", "sn = StanfordNERTagger('E:/stanford/stanford-ner-2014-08-27/classifiers/english.all.3class.distsim.crf.ser.gz',\n", " path_to_jar='E:/stanford/stanford-ner-2014-08-27/stanford-ner.jar')\n", "\n", "ner_tagged_sentences = [sn.tag(sent.split()) for sent in corpus]" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "named_entities = []\n", "for sentence in ner_tagged_sentences:\n", " temp_entity_name = ''\n", " temp_named_entity = None\n", " for term, tag in sentence:\n", " if tag != 'O':\n", " temp_entity_name = ' '.join([temp_entity_name, term]).strip()\n", " temp_named_entity = (temp_entity_name, tag)\n", " else:\n", " if temp_named_entity:\n", " named_entities.append(temp_named_entity)\n", " temp_entity_name = ''\n", " temp_named_entity = None\n", "\n", "#named_entities = list(set(named_entities))\n", "entity_frame = pd.DataFrame(named_entities, \n", " columns=['Entity Name', 'Entity Type'])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Entity NameEntity TypeFrequency
0USLOCATION31
1Donald TrumpPERSON13
2IndiaLOCATION13
3TrumpPERSON12
4SingaporeLOCATION11
5Kim Jong-unPERSON9
6FacebookORGANIZATION9
7YahooORGANIZATION6
8KimPERSON6
9NadalPERSON6
10GoogleORGANIZATION5
11TrudeauPERSON5
12ChinaLOCATION5
13North KoreanLOCATION4
14ChhetriPERSON4
\n", "
" ], "text/plain": [ " Entity Name Entity Type Frequency\n", "0 US LOCATION 31\n", "1 Donald Trump PERSON 13\n", "2 India LOCATION 13\n", "3 Trump PERSON 12\n", "4 Singapore LOCATION 11\n", "5 Kim Jong-un PERSON 9\n", "6 Facebook ORGANIZATION 9\n", "7 Yahoo ORGANIZATION 6\n", "8 Kim PERSON 6\n", "9 Nadal PERSON 6\n", "10 Google ORGANIZATION 5\n", "11 Trudeau PERSON 5\n", "12 China LOCATION 5\n", "13 North Korean LOCATION 4\n", "14 Chhetri PERSON 4" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_entities = (entity_frame.groupby(by=['Entity Name', 'Entity Type'])\n", " .size()\n", " .sort_values(ascending=False)\n", " .reset_index().rename(columns={0 : 'Frequency'}))\n", "top_entities.head(15)" ] }, { "cell_type": "code", "execution_count": 462, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Entity TypeFrequency
0PERSON186
1LOCATION125
2ORGANIZATION54
\n", "
" ], "text/plain": [ " Entity Type Frequency\n", "0 PERSON 186\n", "1 LOCATION 125\n", "2 ORGANIZATION 54" ] }, "execution_count": 462, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_entities = (entity_frame.groupby(by=['Entity Type'])\n", " .size()\n", " .sort_values(ascending=False)\n", " .reset_index().rename(columns={0 : 'Frequency'}))\n", "top_entities.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Emotion and Sentiment Analysis" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from afinn import Afinn\n", "\n", "af = Afinn()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "sentiment_scores = [af.score(article) for article in corpus]\n", "sentiment_category = ['positive' if score > 0 \n", " else 'negative' if score < 0 \n", " else 'neutral' \n", " for score in sentiment_scores]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentiment_score
countmeanstdmin25%50%75%max
news_category
sports25.02.167.363649-10.0-3.00.07.020.0
technology24.0-0.254.936554-15.0-4.00.03.06.0
world25.01.486.042351-12.0-1.01.05.016.0
\n", "
" ], "text/plain": [ " sentiment_score \n", " count mean std min 25% 50% 75% max\n", "news_category \n", "sports 25.0 2.16 7.363649 -10.0 -3.0 0.0 7.0 20.0\n", "technology 24.0 -0.25 4.936554 -15.0 -4.0 0.0 3.0 6.0\n", "world 25.0 1.48 6.042351 -12.0 -1.0 1.0 5.0 16.0" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame([list(news_df['news_category']), sentiment_scores, sentiment_category]).T\n", "df.columns = ['news_category', 'sentiment_score', 'sentiment_category']\n", "df['sentiment_score'] = df.sentiment_score.astype('float')\n", "df.groupby(by=['news_category']).describe()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAEkCAYAAAARu8HuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3Xd8VfX9x/HXJyEQ9pa9FRXZIENE\nQNyzKqg4EEdtf0qttVpcqJW21tVaraMOUByoOHChInuICigCAjIElCmbsDM+vz/uTciE3OTenIz3\n8/G4j+TsNzfA535yzvkec3dERERERETkkLigA4iIiIiIiBQ3apRERERERESyUaMkIiIiIiKSjRol\nERERERGRbNQoiYiIiIiIZKNGSUREREREJBs1SiIiUWZmbmYDgjxmJBnM7GUz+zh26SRSQfwdEhGR\nrNQoiYjkg5l9ZGYT81h2fPiD7enhWQ2Aj4ouXa4iyfBH4KoYZgHAzIaE36cc72NxawzMrIOZfWBm\nG81sv5n9bGbvmlmzKB8nrya1OPwdAorfz0ZEpKioURIRyZ8XgVPNrHkuy64H1gCTANx9o7sfKLpo\nOUWSwd13uvuOWGcKSwX6mNmZRXS8iJlZXUI/y93AucBxwNXASqBaUWQoDn+HRETKOjVKIiL58wmw\nCbg280wzSyD0IXqku6eF52W/DO4+M1tjZgfCZyhGZ1o21cz+m22fWc4ymNlZZjbDzLab2TYz+9zM\njj9c2MwZwvvzXF5D8jjeVDN7xsz+YWZbzOxXM3vMzOIyrVPPzD40s33hP9u1ZrbIzB44wvu4H3ge\neDjz/nLJX93Mng8fO8nMpplZ10zLN5rZZZmmZ4XXKxeePib8Z2wUnr7YzBaE824L769eHofvBdQE\nrnX3ee6+2t2nuftf3H1hpmM2MrM3wz+X7Wb2iZkdk2n5A+H35HIzWxnON87M6qQvB64Bzs30M+kb\nXpb559c8PH15OPc+M/vOzNqbWVsz+9LM9pjZTDNrke19PN/M5oXPiq0ys7+bWflMy1eb2b1m9j8z\n22Vma83sjszLw9+ODWdYjYhIGaFGSUQkH9w9BXgFGJLtA/75QB1gVG7bmdklwO3ATcAxwHnANxEe\nvjLwBNAN6AvsBD7K/IH3CP5I6FKu9Nc9wF5g7mG2uRJIAU4ChgK3ApdlWv4K0Aw4FbiQ0KV7+b0s\n7a9Aq/AxcjAzI9SYNiL0fnUCpgOTzaxBeLVpQL/w+pWArsCB8FcIvU8r3H2dmdUH3gxnPh44BXj1\nMPk2EqqPA8JZcstYCZhCqPHrA/QENgATw8vSNSf0vl0EnBH+s/w9vOwx4G1gIod+Nl8eJtdfgYfD\n+9gBvAE8Rejn2Q1IBJ7MlPFM4HXgv8AJwHXAAOAf2fb7J2Ah0Dm8/0fMrGd42Ynhr78N5zsREZEy\nQo2SiEj+vQQ0BU7LNO96YIK7/5LHNs0IfYCe4O4/u/tcd/9vHuvmyt3fDb+Wu/sCQme1WhD6cJyf\n7XeGL+XaCBwN3Adc7e6LDrPZYne/z92XufvbhJqC/gBmdixwJvA7d5/t7vOBIUClPPeWNc+vhJqE\nEWZWIZdV+gEdgQHu/o27r3D34cBPhM7eAUwNrwehM0A/EWqu0uf1Da8D0BBIAN4Jnx1a5O4vuvum\nPPJ9RaiZeAXYZmYTzOxuy3p/0uWAETrrtMDdlwK/A6oQau7SlQOGhNeZTehsWv/wcXYD+4AD6T8f\ndz+Y9zvHv9x9fPhYjxNqfp5y9ynu/gOhhqhfpvXvAR5191HuvtLdpwDDgN9nawAnuPt/w+/zU8CK\nTBk3h9fZEc63GRGRMkKNkohIPrn7ckJnNq4DMLOGhBqGFw+z2VhCv+lfZWYvmdnAPJqDPJlZKzN7\nI3z51i5ClwDGEWraItlPc+A94EF3f+8Iqy/INr0eOCr8/XFAGpnOSIUbxfURxHmc0Ptycy7LuhBq\nujab2e70F9CW0JkoCDVBrcM/g76EGrmp4e8hdJZnavj77wmdtVlkoQEZ/s9C9yHlyd3vAeoDNxI6\n23I9sNjM+mfK2AJIypRvJ6FL9lpl2tUad9+ZaTrz+xipzD+T9CZvYbZ5lTOd0eoC3JPtPXyD0BnK\n+nnst7AZRURKjXJBBxARKWFeBF4ws1qEzqJsAz7Ma2V3/yV8BqY/oTNRjwP3m1l3d99DqOHIfnlX\nQrbpj4B1hM5YrCN0SdxiIL+X3mFmVcI5P3f37Jde5SY5+x+FQ79cy/VytEi4+24ze5DQWaWR2RbH\nEfrQ3zuXTXeFt19iZpsINUZ9CV2aOAd4yszaELpsb2p43VQzOwPoQejyt+uBh8ysj7t/f5iMWwk1\numPN7C7gO2A4oYEe4oD5hM4sZbct0/eHex8jlXlffph5cZm+/pXQnyG7zGeGoplRRKTUUKMkIhKZ\ndwjdF3IVoTNLo909+wfNLNx9P6HLwj4xs38SugemFzCB0AfWBtk26QCsBjCz2oTuq7k5fOkUZtaZ\nCP7/Dt9T9TqQBNyQ3+0OYwmhD9JdgK/Dx2hM6BK3SDxP6N6nO7PN/xaoB6S5+0+H2X4aoVHpugLT\n3P1XM9sC/IXw/UnpK7q7A7OB2eEG7QdC9w7l2Shl5u4HzWwlh/6M3wKDgC2FHDHwIBBfiO0P51vg\nOHdfUcj9JBO7jCIixZYaJRGRCLj7PjN7A3iA0GVWLx1ufQuNLFeOUEOxm9CH82RgeXiVycATZnYB\n8COhs0ZNCDdKwHZgC/BbM/uF0JmSRwmdVcqv+wkNNnAaUDPT7Sk73X1fBPsBwN1/NLPPgefM7P8I\nDWjwKKEBIvywG2fdT4qZ3Q2MzrZoIjAL+MDM/gIsJXSp2FnARHefEV5vKqGmdWn4vicINU9XkWlw\nDTPrQejP/jmhM1WdCL3Hi3PLZWbnETpT9CawjNAZtPOBcwi9lxBqPG8PZ7wP+Dm8zwuB58KXaebH\nauDs8FnHrYR+JodtvCPwIPCxma0hNGhECqHLF7u5+18i2M9qoL+ZTSN0P9X2KOUTESnWdGpdRCRy\nLxJqkr509yVHWHcHoUu9ZgCLgEuAi919VXj5yEyvWYSaqffTNw4POX4Z0D68/dOELv+K5Bk7fYC6\nhM6ebMj0uuxwGx3BEGAtoWblQ0KNw6+EmqZ8c/d3yHaPTPjszzmEmsgXCDWQbwPHkvU+qCmEznRM\nPcK8nYTO4H1MqEF9HBjh7q/lEWsxoZ/DY4Qut/uGUPN1O+ER49x9L6HR834idGnbUkKDP9Qk1Nzm\n1wuEztDNJXR2sVcE2x6Wu39O6IxbP0J/hm8Inb37OcJd/Tm8j18IvR8iImWCheqRiIhIwYWfDbQe\nGOTu7wadR0REpLB06Z2IiETMzE4FqhIade0oQs8G2gJ8FmQuERGRaFGjJCIiBZEA/A1oSejepK+B\nU8Ij+YmIiJR4uvROREREREQkGw3mICIiIiIiko0aJRERERERkWzUKImIiIiIiGSjRklERERERCQb\nNUoiIiIiIiLZqFESERERERHJRo2SiIiIiIhINmqUREREREREslGjJCIiIiIiko0aJRERERERkWzU\nKImIiIiIiGSjRklERERERCQbNUoiIiIiIiLZqFESERERERHJplzQAaKlTp063rx586BjiIiUafPm\nzdvi7nWDzlEcqU6JiAQvkjpVahql5s2bM3fu3KBjiIiUaWa2JugMxZXqlIhI8CKpU7r0TkRERERE\nJBs1SiIiIiIiItmoURIREREREcmm1NyjlJvk5GTWrl3L/v37g45S6iUmJtK4cWMSEhKCjiIiUmKo\nThUd1SkRiVSpbpTWrl1L1apVad68OWYWdJxSy93ZunUra9eupUWLFkHHERGJGjNrAowG6gNpwPPu\n/h8zqwW8BTQHVgOXuvv2SPevOlU0VKdEpCACvfTOzJqY2RQzW2JmP5jZH8Pza5nZF2a2PPy1ZkH2\nv3//fmrXrq3iE2NmRu3atfUbUREpjVKAP7v78UAP4GYzawPcCUxy92OASeHpiKlOFQ3VKREpiKDv\nUYppAQJUfIqI3uco2PYT7NsRdAoRycTdN7j7t+Hvk4AlQCPgQuCV8GqvAL8p6DH0/2fR0PssIpEK\n9NI7d98AbAh/n2RmmQtQ3/BqrwBTgWEBRCw2xo0bR+vWrWnTpk3QUSTa9myFNy6FdXOhXCKcOhxO\nGhp0KhHJxsyaA52Ar4F64RqGu28ws6MCjFYsqE6VfKNHj2bNmrwfMbNx40YA6tevn+c6zZo1Y/Dg\nwVHPJhKEoM8oZThcAQJUgMaNY/HixTE9Rmpqakz3L3mY+a9QkwSQsh++uA92rgs2k4hkYWZVgHeB\nW919VwTb3Whmc81s7ubNm2MXsBhQnSr9Dhw4wIEDB4KOIVJ03D3wF1AFmAdcHJ7ekW359jy2uxGY\nC8xt2rSpZ7d48eIc87JbtWqVH3fccX7DDTd4mzZt/PTTT/e9e/f6ihUr/Mwzz/TOnTv7ySef7EuW\nLPGUlBRv0aKFp6Wl+fbt293MfNq0ae7ufvLJJ/vy5ct96tSp3qFDB+/QoYN37NjRd+3aleexH374\nYW/btq23b9/ehw0b5u7uzz//vHft2tXbt2/vF198se/Zs8dnzZrlNWvW9ObNm3uHDh18xYoVueZz\nd1+xYoV3797du3bt6sOHD/fKlSu7u3taWprffvvtfsIJJ3jbtm39zTffdHf3KVOmeN++fX3QoEF+\n/PHH+7333utPPPFERsa7777b//Of/xzxfczv+y25eP1S9/urZX39ND3oVCIFAsz1YlBXovkCEoDP\ngdsyzfsRaBD+vgHw45H206VLlxzvl+qU6lRJ8uCDD/qDDz4YdAyRQomkTqkArVrl8fHx/t1337m7\n+8CBA/3VV1/1U0891ZctW+bu7l999ZX369fP3d3PPPNMX7RokX/00UfetWtX/9vf/ub79+/35s2b\nu7v7eeed5zNnznR396SkJE9OTs71uOPHj/eePXv6nj173N1969at7u6+ZcuWjHXuuecef/LJJ93d\n/ZprrvGxY8dmLMsr37nnnutvvPGGu7s/++yzGQXonXfe8dNOO81TUlJ848aN3qRJE1+/fr1PmTLF\nK1Wq5D/99FPG+9GpUyd3d09NTfWWLVtmyXQ4KkAFNPflrE3SI0e7H9wXdCqRAiltjRJghEa9eyLb\n/EeBO8Pf3wk8cqR9qU6pTpV0apSkNIikTgV6j5KF7qx8CVji7v/KtOhD4Brgn+GvH8QyR4sWLejY\nsSMAXbp0YfXq1Xz55ZcMHDgwY530U829e/dm+vTprFq1irvuuosXXniBPn36cOKJJwLQq1cvbrvt\nNq688kouvvhiGjdunOsxJ06cyLXXXkulSpUAqFWrFgCLFi3i3nvvZceOHezevZszzzwzx7a7d+/O\nM9/s2bMZN24cAFdccQW33347ADNnzmTQoEHEx8dTr149+vTpw5w5c6hWrRrdunXLGC61efPm1K5d\nm++++45NmzbRqVMnateuXcB3VvKlyzWQvBcWvA3VGkK/uyEhMehUIhLSC7gaWGhm88Pz7iZUn942\ns+uBn4GBeWwfFapTqlMiUvSCfo5SsShAFSpUyPg+Pj6eTZs2UaNGDebPn59j3d69e/Pcc8+xfv16\nHnzwQR599FGmTp3KKaecAsCdd97Jueeey/jx4+nRowcTJ07kuOOOy7Efd891BJ4hQ4Ywbtw4OnTo\nwMsvv8zUqVNzrJOWlpZnvryEGujcVa5cOcv0DTfcwMsvv8zGjRu57rrr8n0MKYQe/xd6iUix4u4z\nCZ1Vyk3/osqhOqU6JSJFL9DBHNx9prubu7d3947h13h33+ru/d39mPDXbUWZq1q1arRo0YKxY8em\n5+T7778HoHv37nz55ZfExcWRmJhIx44d+d///kfv3r0BWLlyJe3atWPYsGF07dqVpUuX5nqMM844\ng5EjR7J3714Atm0L/RGTkpJo0KABycnJvP766xnrV61alaSkpCPm69GjB++++y4Ab775Zsb2p5xy\nCm+99Rapqals3ryZ6dOn061bt1yzXXTRRXz22WfMmTMn198UiohIsFSnVKdEJPaKzah3xc3rr7/O\nSy+9RIcOHTjhhBP44IPQ1X8VKlSgSZMm9OjRAwj95i4pKYl27doB8MQTT9C2bVs6dOhAxYoVOfvs\ns3Pd/1lnncUFF1xA165d6dixI4899hgAI0aMoHv37px++ulZfsN3+eWX8+ijj9KpUydWrlyZZ74n\nnniCf/3rX3Tr1o0NGzZQvXp1IFRU2rdvT4cOHTj11FN55JFH8hzes3z58vTr149LL72U+Pj4KLyb\nIiISbapTqlMiElt2uFPdJUnXrl197ty5WeYtWbKE448/PqBEwdi7dy8VK1bEzHjzzTcZM2ZMRnHK\nr7S0NDp37szYsWM55phj8r1dWXy/RSQrM5vn7l2DzlEcqU6FqE6VXCNGjABg+PDhAScRKbhI6lTQ\n9yhJlM2bN4+hQ4fi7tSoUYORI0dGtP3ixYs577zzuOiiiyIqPiIiIvmhOiUiJYUapRhbuHAhV199\ndZZ5FSpU4Ouvv47J8Xr37p1xHXhBtGnThp9++imKiUREpDhTnRIRyZ0apRhr165dRKP+iIiIFCXV\nKRGR3GkwBxERERERkWzUKImIiIiIiGSjRklERERERCQbNUoiIiIiIiLZqFGKoR07dvDMM88UaNsh\nQ4bwzjvvRCVH3759yf7sDhEREVCtEhHJi0a9C0tLcz78fj0vzVzFhp37aFC9Itef3IILOjQkLs4K\ntM/04nPTTTdFOa2IiJRGI/75ENt27Yja/mpVq8HwO+867DqqVSIiudMZJUJN0u9fm8fd7y9k4bqd\nbNl9kIXrdnLXewv5/WvzSEvzAu33zjvvZOXKlXTs2JE77riDRx99lBNPPJH27dtz//33Z6w3evRo\n2rdvT4cOHbI8y2L69OmcdNJJtGzZMuM3dlOnTqVv374MGDCA4447jiuvvBL3UL5JkybRqVMn2rVr\nx3XXXceBAwdyZBozZgzt2rWjbdu2DBs2LGP+Sy+9ROvWrenbty+//e1vGTp0KElJSbRo0YLk5GQA\ndu3aRfPmzTOmRUQkurbt2kFc7xOi9spP06VaJSKSOzVKwIffr2fmii3sPZiaZf6+5FRmLN/CRwvW\nF2i///znP2nVqhXz58/n9NNPZ/ny5XzzzTfMnz+fefPmMX36dH744Qf+/ve/M3nyZL7//nv+85//\nZGy/YcMGZs6cyccff8ydd96ZMf+7777jiSeeYPHixfz000/MmjWL/fv3M2TIEN566y0WLlxISkoK\nzz77bJY869evZ9iwYUyePJn58+czZ84cxo0bx/r16xkxYgRfffUVX3zxBUuXLgWgatWq9O3bl08+\n+QSAN998k0suuYSEhIQCvR8iIlL8qFaJSJC2b9/Ogw8+yI4d0TubHi1qlICXZq7K0SSl25ecyosz\nVhX6GBMmTGDChAl06tSJzp07s3TpUpYvX87kyZMZMGAAderUAaBWrVoZ2/zmN78hLi6ONm3asGnT\npoz53bp1o3HjxsTFxdGxY0dWr17Njz/+SIsWLWjdujUA11xzDdOnT8+SYc6cOfTt25e6detSrlw5\nrrzySqZPn84333xDnz59qFWrFgkJCQwcODBjmxtuuIFRo0YBMGrUKK699tpCvxciIlI8qVaJSFF7\n//33+fHHH3nvvfeCjpKDGiVgw859hVqeH+7OXXfdxfz585k/fz4rVqzg+uuvx90xy/0eqAoVKmTZ\nPrf58fHxpKSkZFl+uAyRzAfo1asXq1evZtq0aaSmptK2bdsjHkdEREom1SoRKUrbt29n2rRpuDvT\np08vdmeV1CgBDapXLNTyvFStWpWkpCQAzjzzTEaOHMnu3bsBWLduHb/++iv9+/fn7bffZuvWrQBs\n27atQMc67rjjWL16NStWrADg1VdfpU+fPlnW6d69O9OmTWPLli2kpqYyZswY+vTpQ7du3Zg2bRrb\nt28nJSWFd999N8t2gwcPZtCgQfoNnUiElm9fzrDpw7hp4k1MWjMp6DgiuVKtEpGgvP/++xm/BElL\nSyt2Z5XUKAHXn9yCignxuS6rmBDPDb1bFGi/tWvXplevXrRt25YvvviCK664gp49e9KuXTsGDBhA\nUlISJ5xwAvfccw99+vShQ4cO3HbbbQU6VmJiIqNGjWLgwIG0a9eOuLg4fv/732dZp0GDBjz00EP0\n69ePDh060LlzZy688EIaNWrE3XffTffu3TnttNNo06YN1atXz9juyiuvZPv27QwaNKhA2UTKoqSD\nSQz5bAjjV41nxroZ3Dr1Vr5c/2XQsURyUK0SkaDMmjWLlJQUAFJSUpg1a1bAibKy/JwGLwm6du3q\n2Z+/sGTJEo4//vgjbps+6t2M5VvYl3zoXqWKCfH0PqYOz13VpcBDhJcUu3fvpkqVKqSkpHDRRRdx\n3XXXcdFFFwHwzjvv8MEHH/Dqq68edh/5fb9FyoIJqyfw52l/zjLvkmMu4YGTHggmUBExs3nu3jXo\nHMVRfupUEMODlySFrVWqU4UzYsQIAIYPHx5wEiktRo4cydSpU0lJSaFcuXL07duX6667LqbHjKRO\n6TlKQFyc8dxVXfhowXpenHHoOUo39G7B+e0L/hylkuSBBx5g4sSJ7N+/nzPOOIPf/OY3APzhD3/g\n008/Zfz48QEnFClZGlZpmGNeg8oNAkgihWVmI4HzgF/dvW143gPAb4HN4dXudvdC/0dZmpqaWFCt\nEildLrroIqZNmwZAXFwcF198ccCJsgq8USrKAnQ4cXHGhR0bcWHHRrE8TLH12GOP5Tr/qaeeKuIk\nIqVD2zptuezYy3j7x7dxnBNqn8Dlx10edCwpmJeB/wKjs83/t7vn/p+nxIRqlUjpUrNmTfr06cOk\nSZM45ZRTqFGjRtCRsgi8UUIFSERKqXt73MuQE4aQdDCJ42vrcp+Syt2nm1nzoHOIiJRGF110EWvX\nri12Z5OgGDRKKkAiUpo1rto46AgSO0PNbDAwF/izu28POpCISElTs2ZN7rvvvqBj5Ko4j3o31MwW\nmNlIM6sZdBgREZFMngVaAR2BDcDjua1kZjea2Vwzm7t58+bcVhERkWKquDZKKkAiIlJsufsmd091\n9zTgBaBbHus97+5d3b1r3bp1izakiIgUSrFslFSADm/q1Kl8+aWexyIiEhQzyzyE4UXAoqCyFEeq\nUyJSGgR+j1JuzKyBu28ITxZNAUpLg0XvwOynYdc6qNYIet4MbQdAXPHpJ1NSUpg6dSpVqlThpJNO\nCjqOiEipZ2ZjgL5AHTNbC9wP9DWzjoADq4HfReNYj/9jBEk7tkVjVwBUrVGLP99dtM+8UZ0SkdIi\n8EapKAtQntLS4K2r4KcpkLw3NG/PZvjoj7D4A7j01QI3S3v27OHSSy9l7dq1pKamMnz4cIYNG8Zl\nl13GlClTAHjjjTc4+uijWbNmDddddx2bN2+mbt26jBo1iqZNmzJkyBBq1arFd999R61atZg1axbx\n8fG89tprPPXUU2zcuJG//vWvxMfHU716daZPnx6td0aKysZF8MP7UK0BdBgE5SsHnUhEwtx9UC6z\nX4rFsZJ2bOOmFglR298zq47cdKlOiYjkLvBGqSgLUJ4WvZO1SUqXvBdWToZF70L7gQXa9WeffUbD\nhg355JNPANi5cyfDhg2jWrVqfPPNN4wePZpbb72Vjz/+mKFDhzJ48GCuueYaRo4cyS233MK4ceMA\nWLZsGRMnTiQ+Pp4HHniAKlWqcPvttwPQrl07Pv/8cxo1asSOHdF7orsUkTVfwisXQFpyaHrB23D9\nhGAziUiZoTolIpK74nNNWZBmP52zSUqXvBdm/7fAu27Xrh0TJ05k2LBhzJgxg+rVqwMwaNCgjK+z\nZ88OxZg9myuuuAKAq6++mpkzZ2bsZ+DAgcTHx+d6jF69ejFkyBBeeOEFUlNTC5xVAvLNC4eaJIBf\nvoZ184LLIyJliuqUiEjuAj+jVCzsWle45YfRunVr5s2bx/jx47nrrrs444wzADCzjHUyf59Z5vmV\nK+d9KdZzzz3H119/zSeffELHjh2ZP38+tWvXLnBmKWLx5fM3T0QkBlSnRERypzNKEBq4oTDLD2P9\n+vVUqlSJq666ittvv51vv/0WgLfeeivja8+ePQE46aSTePPNNwF4/fXXOfnkk3PdZ9WqVUlKSsqY\nXrlyJd27d+fBBx+kTp06/PLLLwXOKwHoeTOUr3Jo+thzoH674PKISJmiOiUikjudUYLQB9WP/pj7\n5XcJlaDn0ALveuHChdxxxx3ExcWRkJDAs88+y4ABAzhw4ADdu3cnLS2NMWPGAPDkk09y3XXX8eij\nj2bcJJub888/nwEDBvDBBx/w1FNP8e9//5vly5fj7vTv358OHToUOK8EoEF7GDoHfhwPVRtC6zOD\nTiQiZYjqlIhI7szdg84QFV27dvW5c+dmmbdkyRKOP/74I2+c26h3EGqSWp1aqFHvctO8eXPmzp1L\nnTp1orbP4iDf77dIGTHp50k8+e2TJB1M4pLWl3BTh5vyvISptDCzee7eNegcxVF+6lRxGR5cdUpy\nM2LECACGDy/aIeelZBs9ejRr1qzJc/nGjRsBqF+/fp7rNGvWjMGDB0clTyR1SmeUINQEXfZaaHS7\n2f/N9BylodD2kmL1HCURKRk27tnI7dNuJyUtBYDnvn+OJlWbcEGrCwJOJsVZUT/zSEQkaAcOHAg6\nQp7UKKWLiwsNAV7AYcAjsXr16pgfQ0SCNf/X+RlNUrpvNnyjRklKBNUpEYmWI50JKs5nKnWqREQk\nBtrUboOR9TK7tnXaBpRGREREIqVGSUQkBppWa8q9Pe6lRoUalIsrxyXHXMKA1gOCjiUiIiL5pEvv\nRIqD3Zvhq2egTmvoOCjoNBIllx57KQNaDyA1LZWE+ISg44iIiEgE1CiJBG3lFHjtYvC00PSMx+AP\n84LNJFETZ3HExevkvYiISEmj6l1C9O3bl+zDygK8/PLLDB1a8Oc8STEw/o5DTRLA1hWwbEJweURE\nCkB1SkRKG51RCkvzNMavGs+ri19l456N1K9cn6vbXM05Lc4hzoLtJ1NTUwM9vsTYwaSc83auLfoc\nIhK4fzz0MNt37Ira/mrWqMZA645JAAAgAElEQVTddw2L2v7yojolIqWRGiVCTdKtU27lqw1fsS9l\nHwDb9m/jwdkP8sXqL/h3v38XuFl65JFHSExM5JZbbuFPf/oT33//PZMnT2bSpEmMGjWKc889l3/8\n4x+4O+eeey4PP/wwAFWqVOG2227j888/5/HHH8+yz1GjRvHQQw/RoEEDWrduTYUKFQr3BkiwOg+B\naf88NB1fATpH56FqIlKybN+xixbtzo7a/lYt/PSI66hOiYjkTpfeAeNXjc/SJKXbl7KP2Rtm8+mq\nIxeavJxyyinMmDEDgLlz57J7926Sk5OZOXMmxxxzDMOGDWPy5MnMnz+fOXPmMG7cOAD27NlD27Zt\n+frrrzn55JMz9rdhwwbuv/9+Zs2axRdffMHixYsLnE2KiX53wekPQq1W0LQn/H4GxOt3GCJSNFSn\nRERyp0YJeHXxqzmapHT7UvYxevHoAu+7S5cuzJs3j6SkJCpUqEDPnj2ZO3cuM2bMoEaNGvTt25e6\ndetSrlw5rrzySqZPnw5AfHw8l1xySY79ff311xnblC9fnssuu6zA2aQYqdoQarcKjXpXLjHoNCJS\nhqhOiYjkTo0SsHHPxsMu37RnU4H3nZCQQPPmzRk1ahQnnXQSvXv3ZsqUKaxcuZKmTZvmuV1iYiLx\n8fG5LjOzXOdLCbXwHXjvBlg+Ab59BV45D1KTg04lImWE6pSISO7UKAH1K9c/7PJ6lesVav+nnHIK\njz32GKeccgq9e/fmueeeo2PHjvTo0YNp06axZcsWUlNTGTNmDH369Dnsvrp3787UqVPZunUrycnJ\njB07tlDZpBhY9F7W6R0/wy/fBJNFRMok1SkRkZzUKAFXt7maiuUq5rqsYrmKDG5TuBvre/fuzYYN\nG+jZsyf16tUjMTGR3r1706BBAx566CH69etHhw4d6Ny5MxdeeOFh99WgQQMeeOABevbsyWmnnUbn\nzp0LlU2KgWoNs82wXOaJSFDMbKSZ/WpmizLNq2VmX5jZ8vDXmkFmLCzVKRGRnAK/Y9zMRgLnAb+6\ne9vwvFrAW0BzYDVwqbtvj1WGc1qcw4TVE3IM6FCxXEV6NujJ2S0KNwJR//79SU4+dCnVsmXLMr6/\n4ooruOKKK3Jss3v37izTU6dOzfj+2muv5dprry1UJilGTv4TrJwM21YCFpqu1SLoVCJyyMvAf4HM\nN6zeCUxy93+a2Z3h6UKPw12zRrV8jVQXyf7yQ3VKRCSnwBslirAA5SXO4nii3xN8uupTRi8ezaY9\nm6hXuR6D2wzm7BZnB/4cJSnlqjeCoXNg3bdQtR7UyPueABEpeu4+3cyaZ5t9IdA3/P0rwFSiUKeK\n4plHIiKSP4E3SkVZgA4nzuI4t+W5nNvy3FgeRiR3cfHQ5MSgU4hI/tVz9w0A7r7BzI4KOpCIiERX\ncT1VkqUAASpAIiJS4pjZjWY218zmbt68Oeg4IiISgeLaKOVLfgqQuxdxqrJJ77OIlDGbzKwBQPjr\nr7mt5O7Pu3tXd+9at27dXHek/z+Lht5nEYlUcW2UolKAEhMT2bp1q/5zjDF3Z+vWrSQm6kGpIlJm\nfAhcE/7+GuCDguxEdapoqE6JSEEEfo9SHtIL0D8pRAFq3Lgxa9euRZc7xF5iYiKNGzcOOoaISNSZ\n2RhC983WMbO1wP2E6tPbZnY98DMwsCD7Vp0qOqpTIhKpfDdKZtYaeJbQ/UNtzaw9cIG7/60wAWJZ\ngBISEmjRQsMsi0gwdh/czYcrPyTpYBLntDiHJtWaBB2pVItVnXL3QXks6l+Y/YLqlIhIcRbJGaUX\ngDuA/wG4+wIzewMotgVIRCQoyWnJXP3p1azYsQKAlxa9xBvnvMHRNY8OOFmpFpM6JSIiZVMk9yhV\ncvdvss1LiWYYEZHSYvb62RlNEsC+lH28s/ydABOVCapTIiISNZE0SlvMrBXgAGY2ANgQk1QiIiVc\nOct5wj7e4gNIUqaoTomISNREcundzcDzwHFmtg5YBVwZk1QiIiVc9wbdaV+3PQs2LwCgeoXqXHbs\nZQGnKvVUp0REJGry1SiZWRzQ1d1PM7PKQJy7J8U2mohIyRUfF8+oM0cx6edJ7Dywk9OanUadinWC\njlVqqU6JiEi05atRcvc0MxsKvO3ue2KcSUSkVCgfX56zW5wddIwyQXVKRESiLZJ7lL4ws9vNrImZ\n1Up/xSyZiIhIZFSnREQkaiK5R+m68NebM81zoGX04oiIiBSY6pSIiERNvhsld9cT8UREpNhSnRIR\nkWjKd6NkZgnA/wGnhGdNBf7n7skxyCUiIhIR1SkREYmmSO5RehboAjwTfnUJzxORwtqzBaY8BAvf\nDTqJSEmmOiUiIlETyT1KJ7p7h0zTk83s+2gHEilzfpoGr/4GPC00Pf0RuPnrYDOJlEyqUyKHMXr0\naNasWVPg7dO3HTFiRIG2b9asGYMHDy7w8UWKWiSNUqqZtXL3lQBm1hJIjU0skTJk/O2HmiSAzUth\n+SQ4pn9wmURKJtUpkcNYs2YNq5YtpVGVhAJtn5CSAsDB9Ssj3nbdbl0BKyVPJI3SHcAUM/sJMKAZ\ncG1MUomUJft35Zy3Y3WRxxApBVSnRI6gUZUE/tD+qCI/7lMLfi3yY4oUViSj3k0ys2OAYwkVoKXu\nfiBmyUTKis5Xw/RHD03Hl4dOVweXR6SEUp0SEZFoyvdgDmZ2M1DR3Re4+/dAJTO7KXbRRMqIU++F\nfvdCjebQ+ES4cRqUKx90KpESR3VKRESiKZJR737r7jvSJ9x9O/Db6EeSvPy0eTc9/zGJY+4Zz5n/\nnsa+g7r0vtSo3QrqnQANOkCFqkGnkSiZsXYG571/Hqe+fSrPzX8u6DhlgeqUiIhETST3KMWZmbm7\nA5hZPKBfexehs56YzsFUB+DHTbvp99gUvrr7tIBTSaH98D68k+k2iuVfwB++hfhI/nlKcbNpzyZu\nnnQzTujf7NPfP02lhEoMPkEjPsVQmalTRxq9bOPGjQDUr18/z3U0ApmIyOFFckbpc+BtM+tvZqcC\nY4DPYhNLsluxKSmjSUq3cZcuvS8VFozNOr1jDaz9JpgsEjWvLnk1o0lKN3bZ2DzWlihRnQo7cOAA\nBw6oRoiIFEYkv7IeBtxI6KnnBkwAXoxFKMmpbrUKOebFWQBBJPqqNcg2w6Bq3r8FlpKhRbUWOebV\nrVg3gCRlSpmpU0c6E5T+nJvhw4cXRRwRkVIp32eU3D3N3Z9z9wGErvme7e4xvUnGzFab2UIzm29m\nc2N5rOKuesXy9GhRK8u863vl/CAmJdDJf4KazcMTBr1ugVotg0wkUXBJ60toWrVpxnT5+PLc3/P+\nABOVfqpTIiISTfk+o2RmU4ELwtvMBzab2TR3vy1G2dL1c/ctMT5GifDm73oyc/lm3v12LX/sfwzN\n61QJOpJEQ/XGMHRe6HK7qvXVJJUin1z8CR+v/Ji1u9dyQ7sbKBen+85iSXVKRESiKZKqXd3dd5nZ\nDcAod7/fzBbEKpjk9OWKLfx57Pds2nWAH9bv4rmrutCyrpqlUiG+HDQ7KegUEmW//+L3zFo/C4C3\nf3yb9y54jxqJNQJOVaqpTomISNRE0iiVM7MGwKXAPTHKk50DE8zMgf+5+/NFdNxiJy3NuT3cJAEs\n27Sbv360mFeu6xZwMhHJzeQ1kzOaJIDN+zZz36z7eLL/kwGmKvVUp0REitiRRuE8kvRt0++tjFQs\nR/CMpFF6kNCIQjPdfY6ZtQSWxyTVIb3cfb2ZHQV8YWZL3X16+kIzu5HQjbs0bdo0r32UCkn7U1i/\nc3+WeT9uTAoojYgcyTcbc45cuHLnygCSlCmqUyIiRWzNmjUsW76SytXqFGj7lNTQ6GTrNu2MeNs9\nu2J71XO+GyV3HwuMzTT9E3BJ+rSZ3eXuD0UznLuvD3/91czeB7oB0zMtfx54HqBr166e605KieqV\nEujYpAbzf8l4liJ9j9UIWiLF1cWtL+b1pa9nmdenSZ+A0pQNqlMiIsGoXK0O7Xv8psiPu+CrcTHd\nfyTPUTqSgVHcF2ZW2cyqpn8PnAEsiuYxSppnruzMOe3q07x2Ja7s3pTh57UJOpKI5KF1zdb8ucuf\nqZJQhfJx5enfpD9/OfEvQccq61SnREQk36I5BFO0n+pTD3jfzCCU8w13L5MPDkzXoHoiv+nYiGOO\nqsrJx9ShcgWNoFVqbF4GSz6Aqg2h7SWQkBh0IomCS4+9lEoJldh1cBdntzg76DiiOiUiIhGI5ift\nqF5SEL5kokM091nSjfh4CSNnrQLgP5OW8/Al7bjsRF3zXuL98g28fB6khgbq4PsxMOTjYDNJoSWn\nJTP408H8uP1HAF5c+CJvnPMGLWto+PcAqU6JiEi+RfPSu2j/pk4y2Xcwlde+yjqiyAszVgWURqLq\n6/8dapIAVs+A9d8Fl0ei4qv1X2U0SQB7kvcwdtnYw2whRUB1SkRE8i3fjZKZ9TrCPH0CiCGz0Cuz\nOJX80sFy+WeY2zwpUeJy+Rla9n/EElWqUyIiEk2RfBp76nDz3P0fhY8jeUlMiOfaXi0yps3g//q2\nCjCRRE2P/4NyFQ9Nt+oPDXQ1T0nXo0EP2tZumzFdtXxVLjv2sgATlQmqUyIiEjVHvEfJzHoCJwF1\nzey2TIuqAfGxCiY53Xn2cfQ6ujY/rN/FyUfXoW2j6kFHkmho1Blu/hqWfgxV68PxFwSdSKIgPi6e\nUWeN4os1X7Dr4C7OaHYGdStpSP9YUJ0SEZFYyM9gDuWBKuF1q2aavwsYEItQkrsPvlvH7e98T3Kq\nU7l8PO/ddBLH1q8WdCyJhprNoOfNQaeQKHt50cu8tOglktOSmb52Ov87/X9BRyqtVKdERCTqjtgo\nufs0YJqZvezua460vsTOrW/Nzxiyac/BVC5+5kt+ePCsQDOJSO4Wb13M098/nTH95fov+ftXf+ee\nHvcEmKp0Ko11avTo0axZU/A/Svq2I0aMKND2zZo1Y/DgwQU+vohIaRDJ8OAVzOx5oHnm7dz91GiH\nkpwWr9uZY1zbPQdTA8kiIkf24YoPc8z7cv2XASQpU0pNnVqzZg1LVywnoVbVI6+cixRC9WHlto0R\nb5u8LalAxxQRKW0iaZTGAs8BLwL6hF7Ejq5bJce8hHiNoCVSXPVq1IvXl76eZV7rmq0DSlNmlKo6\nlVCrKrXP6F7kx9064esiP6aISHEUSaOU4u7PxiyJHFb58vH89uQWvDAz9OwkM/jXQI2MJlJc9W7c\nm9OansbEnycC0LBKQ/7e6+8Bpyr1VKdERCRqImmUPjKzm4D3gYynY7r7tqinklzdc14bujSrzhvf\n/MKdZx1PG416J1Ks/bvfv/l247ds2beFfs36kRCXEHSk0k51SkREoiaSRuma8Nc7Ms1zoGX04sjh\n/ObpWcz/ZQcA05fP5MruTfj7Re0DTiUiefnLtL/w6epPAWherTkvn/UytSvWDjhVqaY6JSIiUZPv\nB866e4tcXio+RWTnvoMZTVK6Md/8ElAaETmS7379LqNJAli9azVjlo4JMFHppzolIiLRlO9Gycwq\nmdm94RGFMLNjzOy82EWTzLbuPphjXlr2YfBEpNjYti/n1V5b928NIEnZoTolIiLRlO9GCRgFHCT0\n9HOAtcDfop5IctWybhUqlMv642pUIzGgNCJyJD0b9qRuxboZ03EWx/ktzw8wUZmgOiUiIlETSaPU\nyt0fAZIB3H0foPGpi9DkP/elWe1KVCgXR8fGNZh+R9+gI4lIHiolVGL02aO54rgrOK/lebxw+gt0\nrtc56FilneqUiIhETSSDORw0s4qEbozFzFqRaVQhib1GNSsy7Y5+QceQWFjyESx4G6o1hJNugeqN\ngk4kUbA2aS1fbfiKvcl7aVWjFd0adAs6UmmnOiUiIlETSaN0P/AZ0MTMXgd6AUNiEUqkTFn8Abw9\n+ND0ss9h6FyIj+SfpxQ3v+79lRu/uBEPfWbnP9/+h8T4RK5qc1XAyUo11SkRkSK2ceNG9iTtYcFX\n44r82Ht2bWGj74vZ/iMZ9e4L4GJCRWcM0NXdp8YmlkgZsuDtrNPbV8HaOcFkkah5bfFrGU1SureX\nvZ3H2hINqlMiIhJNkf7KuhEQH97uFDPD3d+LfiyRMqRKvVzmHVX0OSSqmlRtkmNe7UQ9Q6kIqE6J\niBSh+vXrk2o7ad/jN0V+7AVfjaN+veox238kw4OPBEYClwDnh18xHXbVzM4ysx/NbIWZ3RnLY4kE\n5uRboXrTQ9M9bobarYLLI1Ex8NiBNK7SOGM6IS6B+3reF2Ci0k91SkREoimSM0o93L1NzJJkY2bx\nwNPA6YSGeJ1jZh+6++KiylAc7TuYyoJ1O+jeQr+ZLjVqNIU/zIPJD0GznnDsGUEnkij59JJPeWPJ\nG/y862du63Ib5cuVDzpSaac6JSIiURNJozTbzNoUYQHoBqxw958AzOxN4EKgzBag+z5YxOjZa4DQ\neLePDmzPgC45L++REmbpZ/DmZaHvvwTKV4G71wUaSaKjz1t92LY/9ODZMT+O4fOLP6d+lfoBpyrV\nVKdERCRqImmUXiFUhDYSGm7VAHf39jFJFrrO/JdM02uB7jE6VrF38GBqRpMEobFv73pvoRql0uDN\nQVmnD+6GWU9Cr1uCySNR8fR3T2c0SQBpnsagTwYx5bIpAaYq9UpNndq4cSPJu5PYOuHraOwuIsnb\nkth4sMgPK0Vg48aN7NudzFMLfi3yY6/bnUzFjRuL/LgihRFJozQSuBpYCKTFJk4WuT0kMMsQUmZ2\nI3AjQNOmTXNZvfRYsXl3jnnJqZ7LmlLy5PLP6bvX1SiVcJN/mZxj3o4DOwJIUqaoTomISNRE0ij9\n7O4fxixJTmuBzKdLGgPrM6/g7s8DzwN07dq1VHcNbRpVxww805+ySoX44AJJ9JRLhJT9Weed+69g\nskjU/K7d7/jz9D9nmdeyRsuA0pQZpaZO1a9fnz3boPYZRX8hxdYJX1O/li4RLY3q16/PwbQ9/KF9\n0Y+s+tSCXylfX3+vpGTJ96h3wFIze8PMBpnZxemvmCWDOcAxZtbCzMoDlwNFWQCLnWeu6Ez5cqEf\nWdUK5Rh3U6+AE0lU3LESLNM/xUYnQgv9bEu6M1qcwUkNT8qYrpVYi3cveDfARGWC6pSIiERNJGeU\nKhK65jvzkFwOxOT5FO6eYmZDgc8JPRNjpLv/EItjlRRnt2vA0o27mLdmO+d3aMjR9aoGHUmioUIV\nuOo9mP3f0DDhp/816EQSJf/s/U8em/sYOw/s5Mb2NwYdpyxQnRIRkajJd6Pk7tfGMkgexxwPjC/q\n4xZX5z81g4XrdgEwc8VW5q3ZziMDOgScSgrt21fhw6GHpn94D+5ck/f6UiLsT9nPGe+cwf7U0GWV\n09ZO4+n+T3NK41MCTlZ6qU6JiARjz64tLPhqXIG23b9nJwCJlSN/cOyeXVsghg+cPWKjZGZ/cfdH\nzOwpst2kCuDuuuO8COzYezCjSUo37rv1apRKg2mPZJ3evwMWvA3tLw0mj0TFKz+8ktEkpXvqu6fU\nKMWA6pSISHCaNWtWqO3XrAkNdNSoIA1PveqFPv7h5OeM0pLw17kxSyEiIlJwqlMiIgEZPHhwobYf\nMWIEAMOHD49GnKg64mAO7v5R+Nu97v5K5hewN7bxJF2NSuVp3zhrp31R50YBpZGo6jMs63TFmjqb\nVApcc8I1JMYnZpl3Syed2IgF1SkREYmFSAZzuAsYm495EiMfDj2ZJyYuY96a7ZzXvgGXnahncpQK\nna+C6o1g9tNQoymc/mDQiSQKEsslMmHABP41919sP7CdG9vfSPu6sXruqYSpTomISNTk5x6ls4Fz\ngEZm9mSmRdWAlFgFk9zdelrroCNILLTqF3pJqbJk6xJW7lxJ0sEkvt30rRqlGFGdEhGRWMjPGaX1\nhK77vgCYl2l+EvCnWIQSESnptuzbwh8m/4GDaQcBeHze49SrXI+zW5wdcLJSSXVKRESi7oiNkrt/\nD3xvZm+4e3IRZBIRKfHmbpyb0SSl+3L9l2qUYkB1SkREYiGSe5S6mdkDQLPwdga4u7eMRTARkZKs\ndc2cl8keW/PYAJKUKapTIiISNZE0Si8RuoRhHpAamzgiIqVDyxotub3r7Twz/xn2p+7njGZncOmx\nGs0wxlSnREQkaiJplHa6+6cxSyL5smnXfpZuTKJT0xpUS0wIOo5Ey95tMOlBaNgRugwJOo1EyTUn\nXMPlx13OwdSDVC1fNeg4ZYHqlIiIRE0kjdIUM3sUeA84kD7T3b+NeirJ1dtzfuHu9xeSkuZUqVCO\nF6/pSo+WtYOOJYW16H14Z0jo+3nApBHwl5VBJpIoqhBfgQrxFYKOUVaoTomISNRE0ih1D3/tmmme\nA6dGL47kJTk1jb+PX0JKmgOw+0AKD3+2lPdv6hVwMim0j2/NOr13C3z3BnS6Ipg8IiWX6pTIEazb\nncxTC34t0LZb9oVG269TMZKPj4eO26JARxUJTr7/pru7HvISoAMpaezan3Uwp81JB/JYW0qU5H05\n521aVPQ5REo41SmRw2vWrFmhtk9eswaA8g0j30+LKBxfpKjlu1Eys3rAP4CG7n62mbUBerr7SzFL\nJxmqVCjH6cfXY8LiTRnzLu7UKMBEEjVH94cfx2ed1+/uYLKIlGCqUyKHN3jw4EJtP2LECACGDx8e\njTgixV5cBOu+DHwONAxPLwNuzXNtibonLu/In05rzRlt6jHiwhO49bScww9LCTRoDBx3PpRLhEq1\n4YqxUKFK0KlESqKXUZ0SEZEoieQi0zru/raZ3QXg7ilmpuFXi9CGHfsZOesnkvan8MP6nQw6sTFx\ncZFfJyzFUKcrIT4BqjWE+u2CTiNR8kvSL4z+YTRJyUlcdPRFdG/Q/cgbSWGoTomISNRE8il7j5nV\nJnRjLGbWA9gZk1SSq9P/PY3wWA6s27GfTiMmsvCvZwUbSgpvycfw1pWHppd9Bjd/A3HxwWWSQtub\nvJfBnw5my74tAHy66lNGnjmSLvW6BJysVCtVdSp5WxJbJ3xdoG1TkvYCUK5qpQIdl1r1C3RcEZHS\nJJJG6TbgQ6CVmc0C6gIDYpJKcli0bkdGk5Qu6YB+UVoqfD8m6/TWFfDLN9CsZzB5JCpmr5+d0SQB\npHkaH//0sRql2Co1daqwN72vSQrddN+sIA1Prfq66V5EhMgapVbA2UAT4BJCw7Dquq8iUr96YtAR\nJFaqHJW/eVKi1K6Y8xlndSrWCSBJmVJq6pRuuhcRCV4kgzkMd/ddQE3gNOB54NlYhDKzB8xsnZnN\nD7/OicVxSpI6VRJpUrNilnn9j6sbUBqJql5/hGqZRjDs9juo3Sq4PBIVHY/qyNnNz86Ybl6tOZcf\ne3mAicoE1SkREYmaSH7Tln6d17nAc+7+gZk9EP1IGf7t7o/FcP8lzoxhpzLm6zW88+06/nLGsXRv\nlfM31lIC1WwOt3wHq2eGBnM46vigE0mUPNLnEa46/iq2H9hOr0a9KKfBV2JNdUpERKImkjNK68zs\nf8ClwHgzqxDh9lJIc1Zv4+mpK5m3Zjsjxi/m5617g44k0VKuQuh5SmqSSpXXFr/GjRNv5JYpt3DP\nzHs4mHow6EilneqUiIhETSQF5FJCz6c4y913ALWAO2KSKmSomS0ws5FmVjOGxykR0tKcP701n7Xb\n9wGwaN0uHvjoh4BTiUheftr5Ew/PeZg9yXtI8zTGrxrP2GVjg45V2qlOiYhI1OT7OhB33wu8l2l6\nA7ChoAc2s4lAbsPx3EPomvIRhIZ4HQE8DlyXyz5uBG4EaNq0aUGjlAhJ+1MymqR0i9fvCiiNiBzJ\nsm3Lcsxbum1pAEnKDtUpERGJpsAumHf30/Kznpm9AHycxz6eJ3SzLl27dvXc1iktqldKoF2j6ixc\nd+iRIL2O1ghaIsVV53qdSYhLIDktOWNejwY9AkwkkVKdEhEp24rltdtm1iDT5EXAoqCyFCfPXNmZ\n/scdRf1qiQzo0pj7L2gTdCQRycNRlY7iyVOf5ITaJ9CkahNu7Xwr57Y8N+hYEiWqUyIipV9xHYLp\nETPrSOiShtXA74KNUzw0qVWJl4acGHQMiYXtq2HJR1C1AbS5EOITgk4kUdC2dluOrXUsO/fvpGdD\nPUC4lFGdEhEp5Yplo+TuVwedQaTIrJsHo86FlPA9aPPfgKvfO/w2UuwdTDnI6e+czv7U/QBM+mUS\nz532HL0a9Qo4mUSD6pSISOlXLBslkTLlq+cONUkAKyfBhu+hQYfgMkmhvbz45YwmKd1/vv2PGiUR\nESlTRo8ezZo1a/Jcnr5sxIgRea7TrFkzBg8eHPVsR6JGSSRonpa/eVKipKXl/Bm6615+ERGRzCpU\nqBB0hDypURIJWvffh+5PSj0Qmm7eGxp2CjaTFNqQtkN4cdGLHEj/uQJDOw0NMJGIiEjRC+JMULSo\nURIJWpMT4f9mweIPoFpDOOHioBNJFCSWS+Sziz/j8XmPs33/dn7X/nd0qqcGWEREpKRQo1SCrNux\nj799vJgf1u/i5GPqcM85x1O5gn6EpUKdY+CU24NOIVFWp1IdHur9UNAxREREpAD0KbsEufn1b5n/\nyw4A3vj6Z9LSnH9e0j7gVCIiUtRK8s3RIiIlhRqlEmLn3uSMJind1B83B5RGRESKs+J8c7SISEmh\nRqmEqJpYjkY1KrJux6FhpI+tXzXARCIiEhSdCRIRib24oANI/sTFGY8ObE/9aokAHFuvKvef3ybg\nVCIiIiIipZPOKJUgJ7Wqw8xh/di25yBHhRsmERERERGJPp1RKmHKxcepSRIRERERiTE1SiIiIiIi\nItmoURIREREREclGjZKIiIiIiEg2apRERERERESy0ah3IsXB8i9g4Vio2gB63ARV6wWdSERERKRM\nU6MkErSl4+HNQYemfxwPN30FcfHBZRIREREp43TpnUjQ5r+edXrLMlg7J5gsIiIiIgKoURIJXuU6\nOedVymWeiIiIiBSZwGI+qyoAABA7SURBVBolMxtoZj+YWZqZdc227C4zW2FmP5rZmUFlFCkSvf4Y\nujcpXdfroc7RweUREUB1SkSkrAvyHqVFwMXA/zLPNLM2wOXACUBDYKKZtXb31KKPKFIEarWEW74L\nDehQoyk07Bh0IhEJUZ0SESnDAjuj5O5L3P3H/2/v3oPjKs87jn9/tqgasEHCuIT7xXZCQi5cnBBQ\nsJ3ETZpOJkbGxFASF5PgumQgNAPMdGhcau5Dkk4HOs1ASlzuBWxxMQ13kFwBvoBlbBOcBIyDCqHG\nyBCDbZD99I/zqqzXWhWrWp3V7u8z886+5z23Z3V29ez7nnN2e5k1Bbg9IrZGxFrgt8DnBzc6s0G0\ndRPM/x7cMQNuPhmeuyPviMwM5ykzs1pXid96dwDwdMF0Z2ozq07t/wQvLMzq774B93wfDv8SjBid\nb1xmVorzlFWlG2+8kXXr1pWc3zPvkksuKbnMIYccwowZMwY8NrM8lLWjJOkR4KO9zLooIu4ptVov\nbVFi+7OAWQAHH3xwv2I0y91rK3ac3vYe/PfzMGJiPvGY1RDnKbMPr76+Pu8QzAZVWTtKETG5H6t1\nAgcVTB8IvFpi+9cB1wGMHz++1yRlVvEOnwi/efCD6fo94YBj8ovHrIY4T5l9wGeCzHZUiV8Pfi9w\nqqR6SYcB44AlOcdkVj7HzU7ffLc/HHAsnHYb1I/MOyozK815ysysBuR2j5KkZuAaYDRwv6SOiPha\nRKyWdAfwPNANfN/fJGRVbdhw+NO5WTGziuE8ZWZW23LrKEVEC9BSYt5lwGWDG5GZmdkHnKfMzGpb\nJV56Z2ZmZmZmlit3lMzMzMzMzIq4o2RmZmZmZlbEHSUzMzMzM8tFV1cXc+fOZePGjXmHshN3lMzM\nymTp75cy84GZTLt3Gre9cFve4ZiZmVWclpYW1qxZw4IFC/IOZSfuKJmZlcGGzRs4+5GzWfb6MtZ0\nreHyxZfz8LqH8w7LzMysYnR1ddHa2kpE0NbWVnFnldxRMjMrg6WvL2XLti07tLV1tuUUjZmZWeVp\naWkhIgDYvn17xZ1VckfJzKwMxuw1Zqe2sQ1jc4jEzMysMrW3t9Pd3Q1Ad3c37e3tOUe0I3eUzMzK\nYFzjOM45+hzqh9cDMOmgSUz/+PScozIzM6scTU1N1NXVAVBXV0dTU1POEe2oLu8AzMyq1azPzOL0\nT5zOlu4tjPrIqLzDMTMzqyjNzc20trYCMGzYMKZOnZpzRDvyGSUzszLaY7c93EkyMzPrRWNjIxMn\nTkQSEyZMoKGhIe+QduAzSmZmZmZmlovm5mY6Ozsr7mwSuKNkZmZmZmY5aWxsZM6cOXmH0Stfemdm\nZmZmZlbEHSUzMzMzM7Mi7iiZmZmZmZkVcUfJzMzMzMysiDtKZmZmZmZmRdxRMjMzMzMzK5JbR0nS\nKZJWS9ouaXxB+6GSNkvqSOVnecVoZmZmZma1Kc8zSquAqUBbL/NejIijUpk9yHFVvC3vb8s7BDOz\nqjeUB/S6urqYO3cuGzduzDsUM7MhK7eOUkT8KiLW5LX/oajjlY18+SdPcMSPHmDavzzJqxs35x2S\nmVk1G7IDei0tLaxZs4YFCxbkHYqZ2ZBVqfcoHSZpuaRWSSfmHUwliAh+cPtyXlr/DgDL1nUx557V\nOUdlZla9huqAXldXF62trUQEbW1tPqtkZtZPZe0oSXpE0qpeypQ+VnsNODgijgZ+CNwqac8S258l\naZmkZevXry/HU6gYb2/uZt2Gd3doW/lfTn5mZjmp2AG9lpYWIgKA7du3+6ySmVk/lbWjFBGTI+JT\nvZR7+lhna0RsSPVngBeBj5VY9rqIGB8R40ePHl2eJ1Eh9tp9Nz6x3479xeMPH5VTNGZm1aEaB/Ta\n29vp7u4GoLu7m/b29kHZr5lZtam4S+8kjZY0PNUPB8YBL+UbVWX45784mqaxo2jYfTe+8Zn9uPib\nR+YdkpnZkFaNA3pNTU3U1dUBUFdXR1NT06Ds18ys2tTltWNJzcA1wGjgfkkdEfE1YAIwV1I3sA2Y\nHRFv5hVnJTl89Ahu+d4X8g7DzKymSRoNvBkR2ypxQK+5uZnW1lYAhg0bxtSpU3OOyMxsaMrzW+9a\nIuLAiKiPiH1TJ4mImB8RR0bEZyPimIi4L68YzcysdklqltQJHE82oPdgmjUBeE7SCuAuKmxAr7Gx\nkYkTJyKJCRMm0NDQkHdIZmZDUm5nlMzMzCpZRLQALb20zwfmD35EH15zczOdnZ0+m2Rm9v/gjpKZ\nmVmVaWxsZM6cOXmHYWY2pFXclzmYmZmZmZnlzR0lMzMzMzOzIu4omZmZmZmZFVHPr3cPdZLWA+vy\njmOQ7AO8kXcQVhY+ttWplo7rIRFR3b8A3k81lqegtl73tcTHtTrV0nH90HmqajpKtUTSsogYn3cc\nNvB8bKuTj6vVIr/uq5OPa3Xyce2dL70zMzMzMzMr4o6SmZmZmZlZEXeUhqbr8g7AysbHtjr5uFot\n8uu+Ovm4Vicf1174HiUzMzMzM7MiPqNkZmZmZmZWxB2lASKpQdLZ/Vx3nqRpAxTHE5L8rSVVRtIk\nSSfkHYcNnFLvVUlnSLo2j5isujlPWTk5T1Uf5yl3lAZSA9CvBGTWF0l1wCTACahKSBqedwxWk5yn\nrCycp6qP81TGHaWBcyUwRlKHpKslXSBpqaTnJP1Dz0KSZqS2FZJuKlh/gqQnJb3UM2qXRmeekHSX\npBck3SJJad5XJC2XtFLSDZLqiwOSdFqav0rSVQXt35X067Tt6yVdK2mkpLWSdkvL7Cnp5Z5p+/Ak\n7SHp/nSMV0manv6WV0laksrYtOwhkh5Nr4lHJR2c2udJ+qmkx4F/B2YDf5NeXydKOiVte4Wkthyf\nbs2RdKGkc1P9HyU9lupfkXRzH++7TZLmSloMHF+0zZnpPdkKNA3m87Ga4jxlgPNUtXOeGkAR4TIA\nBTgUWJXqXyX79hCRdUYXAhOAI4E1wD5pub3T4zzgzrTsJ4HfpvZJwFvAgWneU8AXgT8GXgE+lpa7\nETgv1Z8AxgP7A78DRgN1wGPASan9ZWBvYDdgEXBtWvcXwEmpPgv4Sd5/16FYgJOB6wum90p/84vS\n9AxgYarfB/xlqp8J3F3wmlgIDE/TFwPnF2xzJXBAqjfk/ZxrqQBfAO5M9UXAkvRe+vtUdnrfpWUD\n+FbBdnreq/sVrPNHQHvPe9LFZSCL85RLwWvBeaqKi/PUwBWfUSqPr6ayHHgWOAIYB3wZuCsi3gCI\niDcL1rk7IrZHxPPAvgXtSyKiMyK2Ax1kie7jwNqI+HVa5t/IElyhzwFPRMT6iOgGbknLfB5ojYg3\nI+J9ssTX4+fAzFSfSZaQbNetBCankbkTI+Kt1H5bwWPPSM3xwK2pfhPZB4wed0bEthL7aAfmSToL\n8OnxwfUMcKykkcBWsg+G44ETgY30/r4D2AbM72V7xxWs8x7ZyKxZuTlP1TbnqermPDVA3FEqDwFX\nRMRRqYyNiH9N7aW+j31r0fq9tW8j6/0Xzu8rhl1pJyLagUMlTSQbIVr1IfZjRdIHg2PJEtEVkub0\nzCpcrNTqBfV3+tjHbODvgIOADkmj+h+x7Yr0we1lsg9pT5KN1n0JGEM24lbKlj4+UPh3GmywOU/V\nMOep6uY8NXDcURo4fwBGpvqDwJmSRgBIOkDSnwCPAt/q+Wchae9+7usFskQxNk1/B2gtWmYxMFHS\nPspuyDstLbMktTcqu/ny5KL1biQbSfIoXT9J2h94NyJuBn4MHJNmTS94fCrVnwROTfXTgf8ssdnC\n1xeSxkTE4oiYA7xBlohs8LQB56fHRWTX5ncAT9P7+64vi4FJkkaley1OKV/YVuOcpwxwnqoRzlMD\noC7vAKpFRGyQ1C5pFfBLstPUT6V7WjcB346I1ZIuA1olbSO75OGMfuxri6SZwJ0piSwFfla0zGuS\n/hZ4nGx07j8i4h4ASZeTvehfBZ4nu768xy3ApXxw+t123aeBqyVtB94H/hq4C6hPN0gOI/vHBHAu\ncIOkC4D1fHBJSbH7gLskTQHOIbthdhzZsX0UWFGuJ2O9WgRcBDwVEe9I2gIs6ut9V0pa52KyDyWv\nkV0G5ctUbMA5T1kB56nq5zw1ABRRk2fSapqkERGxKSWvFuCGiGhJ86YBUyLiO7kGWWUkvQyM77nu\n38zMSnOeGnzOU2Y78xml2nSxpMlk30r0EHA3gKRrgK8Df55jbGZmZs5TZpY7n1EyMzMzMzMr4i9z\nMDMzMzMzK+KOkpmZmZmZWRF3lMzMzMzMzIq4o2RW4SSdJOmTecdhZmbWG+cpq1buKJlVvpOAsiag\n9KNzZmZm/eE8ZVXJHSWrWZIOlfQrSddLWi3pIUkfkTRG0gOSnpG0SNIRkoZLekmZBknbJU1I21kk\naaykiZI6UlkuaWQf+75Q0kpJKyRdmdrOkrQ0tc2XtLukE4Bvkv0wYEeKbaf40vpjJD2dtjFX0qbU\nLklXS1qV9jk9tU+S9LikW4GVki6R9IOCGC+TdG7ZDoCZmfXJecp5ynIWES4uNVmAQ4Fu4Kg0fQfw\nbbJfEB+X2o4DHkv1B4AjgW+Q/cr8RUA9sDbNvw9oSvURQF2J/X4deBLYPU3vnR5HFSxzKXBOqs8D\nphXMKxXfQuC0VJ8NbEr1k4GHyX5Fe1/gd8B+wCTgHeCwgr/Hs6k+DHixMCYXFxcXl8EtzlPOUy75\nFv/grNW6tRHRkerPkP0TPgG4U1LPMvXpcREwATgMuAI4C2glS0YA7cBPJd0CLIiIzhL7nAz8IiLe\nBYiIN1P7pyRdCjSQJbAHi1eUNKKP+I4nu/wB4Fbgx6n+ReC2iNgGvC6pFfgc8DawJCLWpjhelrRB\n0tFkiWp5RGwo8RzMzGxwOE85T1lO3FGyWre1oL6N7B/vxog4qpdlF5GNgO0PzAEuIBvtagOIiCsl\n3U/2i/FPS5ocES/0sh0Bvf3S8zzgpIhYIemMtO1iw/qIrxT1Me+doumfA2cAHwVu2IV9mJlZeThP\n7ch5ygaN71Ey29HbwFpJp8D/Xjf92TRvMdko2faI2AJ0AH9FlpiQNCYiVkbEVcAy4IgS+3gIOFPS\n7mm9vVP7SOA1SbsBpxcs/4c0j4joK76nyS5fADi1YP02YHq6fn002WjjkhKxtQB/RjaSt9NIoZmZ\n5c55ynnKBok7SmY7Ox34rqQVwGpgCkBEbAVeIftHD1niGQmsTNPnpRtRVwCbgV/2tvGIeAC4F1gm\nqQM4P836EVmSexgoHOG7Hbgg3Xg7plR8wHnADyUtIbu2+63U3gI8B6wAHgMujIjfl4jtPeBx4I50\nCYSZmVUe5ynnKRsEiujtzKqZDTVp5G9zRISkU8lumJ3yf61XtI1hwLPAKRHxm3LEaWZmtcl5yoYa\n36NkVj2OBa5VdvfsRuDMXVlZ2Y8FLgRanHzMzKwMnKdsSPEZJbMykfRp4Kai5q0RcVwe8ZiZmRVy\nnjLrmztKZmZmZmZmRfxlDmZmZmZmZkXcUTIzMzMzMyvijpKZmZmZmVkRd5TMzMzMzMyKuKNkZmZm\nZmZWxB0lMzMzMzOzIv8DrlIfiVZmjS8AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))\n", "sp = sns.stripplot(x='news_category', y=\"sentiment_score\", \n", " hue='news_category', data=df, ax=ax1)\n", "bp = sns.boxplot(x='news_category', y=\"sentiment_score\", \n", " hue='news_category', data=df, palette=\"Set2\", ax=ax2)\n", "t = f.suptitle('Visualizing News Sentiment', fontsize=14)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEYCAYAAABPzsEfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XtYVXW+P/D3BxAUwQtCXkDFCxvY\nQKgQHjUmtZrBmUlnUkYzM3PM1GdqbCabRj2dqSZHT3X6jZWjZuqYZp608tZ4pquStwYFEpBLGuZd\nFMU7suHz+2Ot7RCBLpC9N+j79Tz7Ya2111rf795uefNda6/PElUFERHR9Xh5ugNERNQ0MDCIiMgS\nBgYREVnCwCAiIksYGEREZAkDg4iILGFgEBGRJQwMIiKyhIFBRESW+Hi6A1akpKTopk2bPN0NIrox\n4ukO0I1pEiOMkydPeroLRES3vCYRGERE5HkMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhE\nRGQJA4OIiCxhYBARkSVNojQI0a1qVVqPBt/nyOR9Db5PujVwhEFERJYwMIiIyBIGBhERWcLAICIi\nSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkicsCQ0QW\ni8gJEcmusuwlEckTka9F5AMRaeOq9omIqGG5coSxFEBKtWUfA4hV1dsBFAD4owvbJyKiBuSywFDV\nLQBKqi37p6o6zNkdAMJc1T4RETUsT57DGA/gH7U9KSITRSRdRNKLi4vd2C0iIqqJRwJDRGYAcABY\nUds6qrpQVRNVNTEkJMR9nSMiohq5/RatIvIwgJ8DuFtV1d3tExFR/bg1MEQkBcAfANylqhfd2TYR\nEd0YV36tdiWA7QAiReSQiPwawOsAAgF8LCKZIjLfVe0TEVHDctkIQ1UfqGHxW65qj4iIXItXehMR\nkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSUMDCIisoSBQURE\nlri9vHlTsiqtR4Pvc2TyvgbfJxGRO3CEQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxh\nYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGSJywJDRBaLyAkRya6yLEhEPhaR\nQvNnW1e1T0REDcuVI4ylAFKqLXsGwKeqGgHgU3OeiIiaAJcFhqpuAVBSbfEwAH83p/8O4Beuap+I\niBqWu2+g1F5VjwKAqh4VkdtqW1FEJgKYCABdunS55k4PJic3ZB//bZZrdnurcMUNqADehIrIUxrt\nSW9VXaiqiaqaGBIS4unuEBHd8twdGMdFpCMAmD9PuLl9IiKqJ3cHxjoAD5vTDwNY6+b2iYionlz5\ntdqVALYDiBSRQyLyawCzAdwrIoUA7jXniYioCXDZSW9VfaCWp+52VZtEROQ6jfakNxERNS4MDCIi\nsoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSUMDCIisoSBQURElrj7BkrU\nSLnkJlS8ARXRTYUjDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSUM\nDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLPBIYIvKkiOSISLaIrBSR5p7oBxERWef2wBCRUABP\nAEhU1VgA3gBGubsfRERUN546JOUDoIWI+ADwB3DEQ/0gIiKL3H4DJVU9LCIvA/gOwCUA/1TVf1Zf\nT0QmApgIAF26dHFvJ4nqyCU3oAJ4EypqVDxxSKotgGEAugHoBKCliIypvp6qLlTVRFVNDAkJcXc3\niYioGk/covUeAN+qajEAiMj7APoDWO6BvhCRG+3ates2Hx+fRQBiwW9pNmaVALIdDseEhISEE86F\nngiM7wD8h4j4wzgkdTeAdA/0g4jczMfHZ1GHDh2iQ0JCTnt5eamn+0M1q6yslOLiYvuxY8cWARjq\nXO72hFfVnQBWA9gNYI/Zh4Xu7gcReURsSEjIWYZF4+bl5aUhISGlMEaCV3lihAFV/S8A/+WJtonI\no7wYFk2D+e/0vUEFjyESEZElDAwiavK2bdvWYtWqVa2d8ytWrGg9ffr0Dq5sc8OGDYEff/xxy4bc\n5zPPPOPSPt8oBgYRNXnp6en+GzduvBoYDz74YOmsWbOOubLNzz77LDAtLS2gIfc5d+7cjg25v5qU\nl5fXe1tLgSEin1pZRkRUV2fPnvUaOHBgz8jISHtERETMm2++2TYtLc3/jjvuiIyJiYm+8847Iw4c\nONAMAJKSkiInT54cGhcXFx0eHh67adOmgMuXL8tf/vKXTuvXr28bFRVlf/PNN9vOnTu33dixY7sA\nwPDhw8MffPDBLn379rWFhYXFbdy4MSA1NTW8e/fuMcOHDw939uP9999v1atXryi73R49ZMiQ7qWl\npV4AEBoaGvfkk092stvt0TabzZ6RkdE8Pz/fd9myZSHz589vHxUVZd+0aVONwXHw4EGfe++9t0dk\nZKQ9MjLS7hyR3HPPPT1iYmKie/bsGfPyyy8HA8CUKVNCy8rKvKKiouxDhw7tBgDz5s0LiouLi46K\nirKPHj26q8PhAAC8+uqrweHh4bFJSUmRo0aN6up8rQUFBb79+vWz2Ww2e79+/WyFhYW+zvdgwoQJ\nYX379rVNmjSpc9euXWOPHDniAwAVFRXo0qVL7NGjR697TvuagSEizUUkCECwiLQVkSDzEQ7jojsi\nohvy/vvvt+rQoUN5fn5+bmFhYc79999/9oknnuiydu3afTk5OXsffvjhk0899VSoc32HwyF79uzZ\nO2fOnIPPP/98p+bNm+sf//jHI/fdd9/pvLy83EcfffR09TZKS0t9tm/fXjB79uyDI0eOjJg2bdrx\nwsLCnLy8vBbbtm1rcfToUZ9Zs2Z13LJlS0Fubu7ePn36XHzhhRfaO7cPDg525Obm7h0/fnzx7Nmz\n20dGRl4ZO3Zs8aRJk47n5eXlpqSknK/ptU2aNKlLcnLyufz8/NycnJzcPn36XAaAFStWFOXk5OzN\nzMzMXbBgQftjx455z5s377Cfn19lXl5e7rp1677dvXt389WrVwelp6fn5eXl5Xp5een8+fPbFRUV\nNXv55Zc77ty5c29aWlpBYWFh86rtjR49+lRBQUHuyJEjT02ePLmz87l9+/Y137p1a8Fbb711cMSI\nEacWLVoUBABr165tFR0dfaljx46O6/1bXS9RHgMwFUY47AIg5vKzAN643s6JiK6nT58+l2bMmNF5\n8uTJocOGDStt166do7CwsMXgwYNtAFBZWYmQkJCrx1FSU1NPA0D//v0vTJs2zddKGz/72c/OeHl5\noU+fPhfbtWtXnpSUdAkAbDbbpX379vkdOHDAd9++fc2TkpKiAKC8vFwSEhKuhsDo0aNPA0BSUtLF\ndevWtbX62rZt2xa4evXqbwHAx8cH7dq1qwCAOXPmtN+4cWMbADh27FiznJyc5h06dLhQddtNmzYF\nZmdn+8fHx0cDwOXLl71uu+02R1paWsu+ffuea9++fQUA/PKXvzxdUFDQHAAyMjJa/uMf/9gHAJMn\nTy557rnnwpz7u//++0/7+Bi/8idPnnxy6NChPZ999tkTixcvDh43btxJK6/nmoGhqn8F8FcReVxV\nX7OyQyKiurj99tvLdu/enbtmzZrWM2bMCB04cODZnj17XsrMzMyraf3mzZsrYPwCrqiokJrWqW0b\nb29v+Pr6Xv1ar5eXFxwOh3h7e+udd955dv369d9ep011OByW2qzNhg0bAjdv3hyYnp6eFxgYWJmU\nlBR56dKlHxztUVVJTU099cYbbxyuunzZsmVt6tNuQEBApXO6Z8+e5cHBwY5169YFZmRktPzwww/3\nW9mHpXMYqvqaiPQXkdEiMtb5qE+niYiqKioqahYYGFg5ZcqUkqlTpx5PT09vWVJS4vPJJ5+0BICy\nsjJJT0+/5j1zWrVqVXH+/Pl6f4ln4MCBF9LT0wOys7P9AODcuXNeX3/9td+1tgkMDKw4d+6c97XW\nGTBgwLmXXnopBAAcDgdKSkq8zpw54926deuKwMDAyoyMjOZZWVlXv2nl4+OjZWVlAgApKSlnN2zY\n0Pbw4cM+AHD8+HHvgoIC3+Tk5As7d+4MLC4u9i4vL8fatWuvjnh69+59YdGiRW0BYMGCBUGJiYk1\nHioDgPHjxxdPmDCh29ChQ0ucI4/rsXrS+20ALwO4E8Ad5iPRUgtERNewa9euFr169YqOioqyz5kz\np+MLL7xw5N133933zDPPhEVGRtpjYmLsmzdvvua3kYYMGXKuoKCghfOkd1370KlTJ8eCBQuKRo0a\n1d1ms9kTEhKi9uzZc82QGj58+JmNGze2udZJ77/97W/fbd68OdBms9ljY2Ptu3fvbjF8+PBSh8Mh\nNpvNPn369E7x8fFXD0U9+OCDxdHR0fahQ4d2S0hIuDxz5szDd999t81ms9kHDx5sO3jwYLNu3bqV\nP/nkk0fvuOOO6AEDBkTabLZLrVu3rnC29/bbbwfbbDb7ypUr282bN+9gbf1/4IEHSi9evOg9ceLE\nU1bfJ1G9/kWXIrIXgF2trOwCiYmJmp5ee7kpV5WW3jar4W/TMTJ5X4PvsyG44j10xfsHNM73kJ9B\nSyQrK6soPj7e0vFyql1paalX69atK8vLy/GTn/yk57hx406OHTv2TF32sWXLFv8nn3yy865du/Jr\nWycrKys4Pj4+3DlvtTRINoAOAI7WpUNERNTwpk2b1mnLli2tysrK5K677jo7ZsyYOoXF9OnTOyxd\nujRkyZIlNZ6zqY3VwAgGkCsiXwEocy5U1aG1b0JEdGv4wx/+0GHt2rVBVZcNGzasZM6cOS65eHDh\nwoWHbmT7WbNmHavPhY1WA+NPdd0xEdGtYs6cOcdcFQ6NiaXAUNXNru4IERE1bpYCQ0TOAXCe8PYF\n0AzABVVt5aqOERFR42J1hBFYdV5EfgEgySU9IiKiRqleF7qo6ocABjdwX4iIblknT570nj17dohz\nvqioqFlKSkp3T/apOquHpO6vMusF46I93jWLiDziYHJyQkPur3Na2q6G3F99nDp1yvutt9667Zln\nnikGgPDw8PJNmzZZKtnhLlZHGPdVefwEwDkAw1zVKSKixiY/P9+3e/fuMaNGjeras2fPmAEDBkSc\nP39ecnJy/JKTkyNiYmKiExISIjMyMpoDQE5Ojl98fHxUbGxs9NSpUzv5+/v3BoyL7vr162dzlktf\nvnx5GwD4/e9/H3bw4EG/qKgo+2OPPRaWn5/vGxEREQMAt99+e1TV8ihJSUmRaWlp/mfPnvVKTU0N\nj42NjY6Ojr66L1exWkvqkSqPR1X1RVU94cqOERE1Nt99913zJ5544sQ333yT07p164ply5a1nTBh\nQtd58+Z9l5OTs/ell146NHny5C4A8Jvf/KbzlClTTmRnZ+/t1KnT1Wq7/v7+lRs3bvwmNzd37+bN\nmwumT58eVllZiVdeeeVQ586dy/Ly8nIXLFjwvesshg8fXrJixYogADhw4ECzEydONEtOTr44ffr0\njoMGDTqbnZ29Ny0tLX/mzJlhZ8+eddmN8azWkgoTkQ9E5ISIHBeRNSISdv0tiYhuHqGhoWX9+/e/\nBAC9e/e+WFRU5JeRkRGQmpraIyoqyj5lypSuJ06caAYAGRkZAePHjy8BgAkTJlyt11RZWSlTp04N\ns9ls9kGDBtlOnDjhe+jQoWueHhg7duxpZ1n1ZcuWtb3vvvtOA8AXX3zR6tVXX+0YFRVlv/POOyPL\nysrkm2++sVTyvT6sXri3BMA7AFLN+THmsntd0Skiosaoaml0b29vPX78uE9gYKAjLy8v1+o+FixY\nEHTq1CmfPXv27PXz89PQ0NC4msqbV9WtW7fyNm3aOHbu3Nni/fffD1qwYMEBAFBVrF69+pv4+Piy\na23fUKwOXUJUdYmqOszHUgAh19uIiOhm1qpVq8qwsLArixcvbgsYN3vavn17CwDo1avX+aVLl7YF\ngMWLF18tG1JaWuodHBxc7ufnp+vXrw88cuSILwC0bt264sKFC7X+Th4xYkTJrFmzOpw7d87beQOo\nQYMGnX3llVfaV1Yat7rYunVrC5e9WFgPjJMiMkZEvM3HGACWS+ISEd2sVq5cuX/JkiXBznuSr1mz\npg0AvPbaawdfe+219nFxcdFHjx5tFhAQUAEAEyZMKMnKymoZGxsbvXz58qBu3bpdBoAOHTpUJCQk\nnI+IiIh57LHHfnDIf8yYMac3btwYNGzYsBLnstmzZx9xOBwSFRVlj4iIiJk5c2Zo9e0aktVDUuMB\nvA7gVRhfp90G4JH6NioibQAsAhBr7m+8qm6v7/6I6Nbiia/BRkZGXiksLMxxzj///PPHndNpaWmF\n1dcPDw8vz8zMzPPy8sLChQvbxsXFXQCAjh07Omq7m2D1O/5Vba9z584Oh8PxvdcdEBCg77zzzoH6\nv6q6sRoYLwB4WFVPA4CIBMG4odL4erb7VwCbVHWEiPgC8K/nfoiIGqWtW7f6//a3v+2iqmjVqlXF\n0qVLizzdpxtlNTBud4YFAKhqiYj0rk+DItIKwI8AjDP3dQXAlfrsi4iosUpJSTmfn59v+WR4U2D1\nHIaXiFy97aE5wrAaNtV1B1AMYImIZIjIIhFpeb2NiIjIs6z+0n8FwDYRWQ3jnMOvALx4A232AfC4\nqu4Ukb8CeAbAf1ZdSUQmApgIAF26dKlnU3QzmvHJDd07pkYv3sPLioiux+qV3ssADAdwHMbo4H5V\nfbuebR4CcEhVd5rzq2EESPU2F6pqoqomhoTwG7xERJ5m+bCSquYCuOHjcap6TEQOikikquYDuLsh\n9ktERK7lspoj1/E4gBUi8jWAXgBmeagfRERu89///d8hr7/+ejsAmDt3bruioqJmzudGjhzZddeu\nXc1r39rz6nvi+oaoaiaMEulERHW2Kq1Hg5Y3H5m8zy3XdTz99NPFzunly5cH9+rV61J4eHg5AKxa\ntcpt11PUl6dGGERETUp+fr5vt27dYu6///5wm81mT0lJ6X7u3DmvtWvXBkZHR9ttNps9NTU1/NKl\nSwIAU6ZMCe3Ro0eMzWazT5w4MQwAfve733V69tln2y9ZsqRtdna2/9ixY7tHRUXZz58/L0lJSZFb\ntmzxnzNnTsikSZOufgtj7ty57R5++OHOADBv3ryguLi46KioKPvo0aO7OhwOt74HDAwiIouKioqa\nT5o0qbigoCA3MDCw8oUXXmj/2GOPdVu1atW+goKCXIfDgZdeeink+PHj3h999FHbwsLCnIKCgtxZ\ns2YdrbqfRx555HRsbOzFZcuW7c/Ly8sNCAi4WtTwoYceOv3RRx9dva/F6tWrg0aPHn169+7dzVev\nXh2Unp6el5eXl+vl5aXz589v587Xz8AgIrKoQ4cOV3784x9fAICHHnro1ObNmwPDwsLKbr/99jIA\nGDdu3Kkvv/wyMCgoqMLPz69y1KhRXf/+97+3CQgIqLTaRqdOnRydO3cu+/TTT1seO3bMe//+/c3v\nvffe85s2bQrMzs72j4+Pj46KirJ/+eWXrfbv3+/nqtdaE4+cwyAiaopExNJ6zZo1Q2Zm5t5169a1\nevfdd9v+7W9/u23Hjh0FVtsZMWLE6ZUrV7aNioq6PGTIkNNeXl5QVUlNTT31xhtvHK73C7hBHGEQ\nEVl09OhR308++aQlALzzzjtBAwcOPHv48GHf7OxsPwBYtmxZu+Tk5HOlpaVeJSUl3iNHjiydP3/+\nwb179/6gXl5AQEBFaWmpd03tjBkz5vSmTZvavvfee0GjR48uAYCUlJSzGzZsaHv48GEfADh+/Lh3\nQUGBy26WVBMGBhGRRd27d7+8ePHidjabzX769GmfmTNnnpg/f35RampqD5vNZvfy8sJTTz1VfObM\nGe+UlJQIm81mT05Ojvzzn/98sPq+xo4de/Lxxx/v6jzpXfW5kJCQioiIiEuHDx/2GzRo0EUASEhI\nuDxz5szDd999t81ms9kHDx5sO3jwYLPq+3UlUdXrr+VhiYmJmp6eXuvzB5OTXdLutllHGnyfI5P3\nNfg+G4Ir3kNXvH8A8HXZ5gbf542WBuFn0BLJysoqio+PP+mpDtyI/Px835///OcRVUuO3+yysrKC\n4+Pjw53zHGEQEZElDAwiIguq30DpVsTAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIjfKz8/3nT9/\nflB9tvX39+/d0P2pC5YGIaImZ8Ynhxq0vPmL94S5pbw5ABQWFvqtWrUqaNKkSSXVnysvL0ezZm69\nFq9OOMIgIrIgPz/ft3v37jGjRo3q2rNnz5gBAwZEnD9/XnJycvySk5MjYmJiohMSEiIzMjKaA8Dw\n4cPDlyxZ0ta5vXN0MGPGjND09PSAqKgo+3PPPXfb3Llz2w0ZMqT74MGDeyYnJ9tKS0u9+vXrZ7Pb\n7dE2m82+fPnyNrX1yd04wiAisui7775rvnz58v39+/c/8NOf/rT7smXL2r799tvBCxcuPBAXF1f2\n2WeftZw8eXKXaxUafPHFFw+/8sor7T///PNvAON+F7t37w74+uuvc9q3b19RXl6OjRs3fhMUFFR5\n9OhRn759+0aNHj36jJeX5/++Z2AQEVkUGhpa1r9//0sA0Lt374tFRUV+GRkZAampqT2c61y5csVa\nSdsqkpOTz7Zv374CACorK2Xq1KlhO3bsCPDy8sKJEyd8Dx065NOlSxf33i2pBgwMIiKLfH19rxbf\n8/b21uPHj/sEBgY68vLycquv6+PjoxUVFQCAyspKlJeX1xok/v7+V++XsWDBgqBTp0757NmzZ6+f\nn5+GhobGXbp0yfPDC/AcBhFRvbVq1aoyLCzsyuLFi9sCRjBs3769BQB07dr1yq5du/wBYMWKFW0c\nDocAQOvWrSvOnz9fY1lzACgtLfUODg4u9/Pz0/Xr1wceOXLErSXMr4WBQUR0A1auXLl/yZIlwZGR\nkfaIiIiYNWvWtAGAxx9/vHjbtm2BcXFx0Tt27GjZokWLSgBISkq65OPjo5GRkfbnnnvutur7mzBh\nQklWVlbL2NjY6OXLlwd169btsrtfU214SIqImhx3fg3WqXrxweeff/64czotLa2w+vqdO3d2ZGVl\n5TnnnXfK8/Pz0+3bt1c/KX7KOdGxY0dHZmZmHmpw8eLFjBt4CTeMIwwiIrKEgUFERJYwMIiIyBKP\nBYaIeItIhohs8FQfiMjtKisrK+t8nQK5n/nvVFl1mSdHGL8FsNeD7ROR+2UXFxe3Zmg0bpWVlVJc\nXNwaQHbV5R75lpSIhAH4GYAXAfzOE30gIvdzOBwTjh07tujYsWOx4CHxxqwSQLbD4ZhQdaGnvlb7\n/wA8DSDQQ+0TkQckJCScADDU0/2g+nF7YIjIzwGcUNVdIjLwGutNBDARALp06eKm3hHd/GZ8csgl\n+33xnjCX7JcaD08MCQcAGCoiRQDeBTBYRJZXX0lVF6pqoqomhoSEuLuPRERUjdsDQ1X/qKphqhoO\nYBSAz1R1jLv7QUREdcOTTkREZIlHa0mp6hcAvvBkH4iIyBqOMIiIyBIGBhERWcLAICIiSxgYRERk\nCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkiUeLD96KePMaImqq\nOMIgIiJLGBhERGQJA4OIiCxhYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJ\nA4OIiCxhYBARkSVuDwwR6Swin4vIXhHJEZHfursPRERUd56oVusA8HtV3S0igQB2icjHqprrgb4Q\nEZFFbh9hqOpRVd1tTp8DsBdAqLv7QUREdePRcxgiEg6gN4CdNTw3UUTSRSS9uLjY3V0jIqJqPBYY\nIhIAYA2Aqap6tvrzqrpQVRNVNTEkJMT9HSQiou/xSGCISDMYYbFCVd/3RB+IiKhuPPEtKQHwFoC9\nqvo/7m6fiIjqxxMjjAEAHgIwWEQyzcdPPdAPIiKqA7d/rVZVvwQg7m6XiIhuDK/0JiIiSxgYRERk\nCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZEl\nDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYw\nMIiIyBIGBhERWcLAICIiSzwSGCKSIiL5IvKNiDzjiT4QEVHduD0wRMQbwBsAhgCwA3hAROzu7gcR\nEdWNJ0YYSQC+UdX9qnoFwLsAhnmgH0REVAeiqu5tUGQEgBRVnWDOPwSgr6r+ptp6EwFMNGcjAeS7\ntaN1EwzgpKc70cTxPbxxjf09PKmqKZ7uBNWfjwfalBqW/SC1VHUhgIWu786NE5F0VU30dD+aMr6H\nN47vIbmaJw5JHQLQucp8GIAjHugHERHVgScC418AIkSkm4j4AhgFYJ0H+kFERHXg9kNSquoQkd8A\n+D8A3gAWq2qOu/vRwJrEobNGju/hjeN7SC7l9pPeRETUNPFKbyIisoSBQURElty0gSEibURkSj23\nXWpeL9IQ/fhCRPhVx3oSkYEi0t/T/WiKavvsicg4EXndE32ipu2mDQwAbQDUKzCocRARHwADATAw\n6sgswUPUoG7mwJgNoIeIZIrISyIyTUT+JSJfi8hzzpVEZKy5LEtE3q6y/Y9EZJuI7HeONsy/dr8Q\nkdUikiciK0REzOfuFpEMEdkjIotFxK96h0TkAfP5bBGZU2X5r0WkwNz3myLyuogEisi3ItLMXKeV\niBQ55xsjEWkpIhvN9zJbREaafZ4jIl+Zj57mul1F5FPzvf9URLqYy5eKyP+IyOcAVgGYBOBJ898x\nWURSzX1nicgWD75clxGRp0XkCXP6VRH5zJy+W0SWX+NzdF5EnheRnQD6VdvnI+ZnbDOAAe58PXQT\nUdWb8gEgHEC2Of1jGF85FBghuQHAjwDEwCg5EmyuF2T+XArgPXNdO4zaV4Dx124pjIsNvQBsB3An\ngOYADgKwmestAzDVnP4CQCJlq2BiAAAG8ElEQVSATgC+AxAC4+vMnwH4hbm8CEAQgGYA0gC8bm67\nBMAvzOmJAF7x9Pt6nfd8OIA3q8y3Nl/bDHN+LIAN5vR6AA+b0+MBfFjlvd8AwNuc/xOAp6rscw+A\nUHO6jadfs4vex/8A8J45nQbgK/Oz8V/m4wefI3NdBfCrKvtxfvY6VtnGF8BW52eMDz7q8riZRxhV\n/dh8ZADYDSAKQASAwQBWq+pJAFDVkirbfKiqlaqaC6B9leVfqeohVa0EkAkjmCIBfKuqBeY6f4cR\nSFXdAeALVS1WVQeAFeY6SQA2q2qJqpbDCCqnRQAeMacfgREgjdkeAPeYI4pkVS01l6+s8tP5l28/\nAO+Y02/DCF6n91S1opY2tgJYKiKPwriO52a0C0CCiAQCKIPxh0kigGQAZ1Dz5wgAKgCsqWF/fats\ncwXGyI2ozm6VwBAAf1HVXuajp6q+ZS6v7UKUsmrb17S8AsZfeTXVx6qpD3VZDlXdCiBcRO6C8Rd3\ntoV2PMYMzAQYwfEXEXnW+VTV1WrbvMr0hWu0MQnATBjlZTJFpF39e9w4mX84FMH4I2EbjFHGIAA9\nYIwUanP5GkHLC67oht3MgXEOQKA5/X8AxotIAACISKiI3AbgUwC/cv7SEZGgeraVB+MXe09z/iEA\nm6utsxPAXSISbJ6QfMBc5ytzeVvzJO/watstg/GXeWMfXUBEOgG4qKrLAbwMoI/51MgqP7eb09tg\nlIUBgAcBfFnLbqv+O0JEeqjqTlV9FkZl1s61bNfUbQHwlPkzDca5nEwAO1Dz5+hadgIYKCLtzHNg\nqa7rNt3MPFGt1i1U9ZSIbBWRbAD/gHH4Y7t5jvo8gDGqmiMiLwLYLCIVMA5ZjatHW5dF5BEA75m/\n9P8FYH61dY6KyB8BfA5jVPGRqq4FABGZBeM/9REAuTDOkzitAPBn/PuwTmMWB+AlEakEUA5gMoDV\nAPzME7FeMH7BAcATABaLyDQAxfj3obfq1gNYLSLDADwO4wR4BIz38FMAWa56MR6WBmAGgO2qekFE\nLgNIu9bnqDbmNn+CEdZHYRyWvVkP55ELsTRIIyAiAap63gybD2DU1/rAfG4EgGGq+pBHO1lPIlIE\nINF5noiImq6bdoTRxPxJRO6B8W2rfwL4EABE5DUYt7L9qQf7RkQEgCMMIiKy6GY+6U1ERA2IgUFE\nRJYwMIiIyBIGBjVaIvILEbF7uh9EZGBgUGP2Cxi1vFxGWNWVyDIGxi1IRMJFZK9ZGTdHRP4pIi1E\npIeIbBKRXSKSJiJRIuItRsVeEeMeI5Ui8iNzP2ki0lNE7jKryWaKUbE38BptP21WWs0SkdnmskfF\nqCScJSJrRMRfjHtgDIVxIWCm2bcf9M/cvoeI7DD38byInDeXixiVirPNNkeayweKyOci8g6APSLy\ngoj8tkofXxSzWiwRVeHp6od8uP8Bo2CiA0Avc/5/AYyBceV0hLmsL4DPzOlNMCr7/hzGVewzAPjB\nKLgIGFdjDzCnAwD41NLuEBglQfzNeWd14HZV1vkzgMfN6aUARlR5rrb+bQDwgDk9CcB5c3o4gI9h\nXNXcHkYdpo4wqg5fANCtyvux25z2ArCvap/44IMP48EL925d36pqpjm9C8Yvzf4wyps413He0yMN\nRkXUbgD+AuBRGPWL/mU+vxXA/4jICgDvq+qhWtq8B8ASVb0IfK86cKyI/BnGTa8CYNT++h6zDlht\n/esH4/AVYJSAedmcvhPASjUK8h037wVxB4CzMKoOf2v2o0hETolIbxjBkqGqp2p5DUS3LAbGrat6\n1d32AM6oaq8a1nUWv+sE4FkA02D8lb4FAFR1tohshHFF+g4RuUdV82rYT23VgZfCuKdDloiMM/dd\nndc1+leba1URrl4RdxGMOmIdACyuQxtEtwyewyCnswC+FZFU4Orx/3jzuZ0w/rqvVNXLMKqmPgYj\nSJwVZPeo6hwA6TDuN1KTf8KoGuxvbuesDhwI4KhZSfXBKutfrVSrqtfq3w78u8rvqCrbbwEw0jwP\nEwJjlPRVLX37AEAKjBHID0Y4RMTAoO97EMCvRSQLQA6AYQCgqmUw7ii4w1wvDcYv8j3m/FTzxHIW\ngEswqgP/gKpuArAOQLqIZMIo3w0A/wkjlD6GUSre6V0A08wT6T1q6x+AqQB+JyJfwThH4az2+wGA\nr2FUtP0MwNOqeqyWvl2BUQH2f7X2e0oQ3dJYS4qaPHPEcklVVURGwTgBPux621XbhxeMst+pqlro\nin4SNXU8h0E3gwQAr4txNvwMjHuEW2ZeHLgBwAcMC6LacYRBDU5E4mDcp7uqMlXt64n+EFHDYGAQ\nEZElPOlNRESWMDCIiMgSBgYREVnCwCAiIksYGEREZMn/B5JpsJb/6XLqAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fc = sns.factorplot(x=\"news_category\", hue=\"sentiment_category\", \n", " data=df, kind=\"count\", \n", " palette={\"negative\": \"#FE2020\", \n", " \"positive\": \"#BADD07\", \n", " \"neutral\": \"#68BFF5\"})" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Most Negative Tech News Article: The maker of world's cheapest smartphone 'Freedom 251' priced at ₹251, Ringing Bells' founder Mohit Goel was arrested along with two more people by the Delhi Police on Sunday. The three were allegedly trying to extort money in lieu of settling a rape case. Last year, Goel was arrested over allegations of fraud and an alleged non-payment of ₹16 lakh.\n", "\n", "Most Positive Tech News Article: The American Automobile Association has launched a contest to find the first couple to get married in one of its self-driving shuttles in Las Vegas. The contestants will have to write a 400-word essay describing how an autonomous vehicle would have changed their road trip experience with their partner. The winning couple will be married on June 30.\n" ] } ], "source": [ "pos_idx = df[(df.news_category=='technology') & (df.sentiment_score == 6)].index[0]\n", "neg_idx = df[(df.news_category=='technology') & (df.sentiment_score == -15)].index[0]\n", "\n", "print('Most Negative Tech News Article:', news_df.iloc[neg_idx][['news_article']][0])\n", "print()\n", "print('Most Positive Tech News Article:', news_df.iloc[pos_idx][['news_article']][0])" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Most Negative World News Article: Slamming Canadian Prime Minister Justin Trudeau's comments on US tariffs during the G7 summit, US President Donald Trump's trade adviser Peter Navarro said, \"Trudeau deserves a special place in hell.\" Navarro also accused Trudeau of backstabbing Trump. The Canadian PM had called US tariffs \"insulting\", saying the country won't be pushed around and plans to apply retaliatory tariffs.\n", "\n", "Most Positive World News Article: Pope Francis on Sunday said he is praying that the upcoming summit between US President Donald Trump and North Korean leader Kim Jong-un succeeds in laying the groundwork for peace. Urging people around the world to pray for the summit, the pontiff said, \"I want to offer the beloved people of Korea an especial thought of friendship.\"\n" ] } ], "source": [ "pos_idx = df[(df.news_category=='world') & (df.sentiment_score == 16)].index[0]\n", "neg_idx = df[(df.news_category=='world') & (df.sentiment_score == -12)].index[0]\n", "\n", "print('Most Negative World News Article:', news_df.iloc[neg_idx][['news_article']][0])\n", "print()\n", "print('Most Positive World News Article:', news_df.iloc[pos_idx][['news_article']][0])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "from textblob import TextBlob\n", "\n", "sentiment_scores_tb = [round(TextBlob(article).sentiment.polarity, 3) for article in news_df['clean_text']]\n", "sentiment_category_tb = ['positive' if score > 0 \n", " else 'negative' if score < 0 \n", " else 'neutral' \n", " for score in sentiment_scores_tb]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentiment_score
countmeanstdmin25%50%75%max
news_category
sports25.00.0840400.149114-0.200-0.017000.0750.159000.381
technology24.00.0104580.203315-0.500-0.075250.0000.059250.500
world25.00.1207600.221134-0.2960.000000.0750.211000.700
\n", "
" ], "text/plain": [ " sentiment_score \\\n", " count mean std min 25% 50% \n", "news_category \n", "sports 25.0 0.084040 0.149114 -0.200 -0.01700 0.075 \n", "technology 24.0 0.010458 0.203315 -0.500 -0.07525 0.000 \n", "world 25.0 0.120760 0.221134 -0.296 0.00000 0.075 \n", "\n", " \n", " 75% max \n", "news_category \n", "sports 0.15900 0.381 \n", "technology 0.05925 0.500 \n", "world 0.21100 0.700 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame([list(news_df['news_category']), sentiment_scores_tb, sentiment_category_tb]).T\n", "df.columns = ['news_category', 'sentiment_score', 'sentiment_category']\n", "df['sentiment_score'] = df.sentiment_score.astype('float')\n", "df.groupby(by=['news_category']).describe()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
news_categorysentiment_scoresentiment_category
0technology-0.058negative
1technology0.119positive
2technology-0.022negative
3technology0.363positive
4technology0.078positive
\n", "
" ], "text/plain": [ " news_category sentiment_score sentiment_category\n", "0 technology -0.058 negative\n", "1 technology 0.119 positive\n", "2 technology -0.022 negative\n", "3 technology 0.363 positive\n", "4 technology 0.078 positive" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEYCAYAAABPzsEfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XlcVXX+P/DXGxAQwQUhFxBXLqu5\nQPhVY1KrGZwp7Zvx1cxsGcft0WIz2TjZr5lqcrRs+n1bTK3USMecbHFrnKmclNwaFBkBWVJxV1AU\nxQW58P7+cc51iAAPyL0X9PV8PO6Dc88953w+94q87ucs7yOqCiIioqvxcHcHiIioeWBgEBGRJQwM\nIiKyhIFBRESWMDCIiMgSBgYREVnCwCAiIksYGEREZAkDg4iILPFydwesSEpK0vXr17u7G0R0bcTd\nHaBr0yxGGCdPnnR3F4iIbnjNIjCIiMj9GBhERGQJA4OIiCxhYBARkSUMDCIisoSBQUREljgtMERk\nkYgUikhmtfmPi0iuiGSJyCvOap+IiBqXM0cYSwAkVZ0hIkMBjARws6rGAJjrxPaJiKgROS0wVHUT\ngOJqs6cAmK2qZeYyhc5qn4iIGperS4PYACSKyMsALgF4WlX/VdOCIjIRwEQACAsLc10PiZqQFak9\nG32boxP3Nvo26cbg6oPeXgDaAfgvANMB/FVEaqwvo6oLVTVeVeODg4Nd2UciIqqBqwPjMIBP1fAd\ngEoAQS7uAxERNYCrA+NzAMMAQERsALwBsLIgEVEz4LRjGCKyHMAQAEEichjA7wEsArDIPNX2MoCH\nVFWd1QciImo8TgsMVb2/lpfGOatNIiJyHl7pTUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OI\niCxhYBARkSUMDCIissTV1WrpBuKMSqsAq60SuQtHGEREZAkDg4iILGFgEBGRJQwMIiKyhIFBRESW\nMDCIiMgSBgYREVnCwCAiIksYGEREZInTAkNEFolIoXn/7uqvPS0iKiJBzmqfiIgalzNHGEsAJFWf\nKSJdANwJ4KAT2yYiokbmtMBQ1U0Aimt46XUAzwBQZ7VNRESNz6XHMERkBIAjqpphYdmJIpImImlF\nRUUu6B0REdXFZYEhIn4AZgJ43sryqrpQVeNVNT44ONi5nSMioqty5QijJ4DuADJEpABAKICdItLR\nhX0gIqIGctn9MFR1N4CbHM/N0IhX1ZOu6gMRETWcM0+rXQ5gK4AIETksIr90VltEROR8ThthqOr9\nV3m9m7PaJiKixscrvYmIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYw\nMIiIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLA\nICIiS5wWGCKySEQKRSSzyrxXRSRHRP4tIp+JSFtntU9ERI3LmSOMJQCSqs37EkCsqt4MIA/A75zY\nPhERNSKnBYaqbgJQXG3eP1TVbj7dBiDUWe0TEVHjcucxjEcB/K22F0VkooikiUhaUVGRC7tFREQ1\ncUtgiMhMAHYAy2pbRlUXqmq8qsYHBwe7rnNERFQjL1c3KCIPAbgLwO2qqq5un4iIGsalgSEiSQB+\nC+A2Vb3gyraJiOjaOPO02uUAtgKIEJHDIvJLAG8BCADwpYjsEpH5zmqfiIgal9NGGKp6fw2z33dW\ne0RE5Fy80puIiCxhYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxh\nYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSXOvKf3IhEp\nFJHMKvMCReRLEck3f7ZzVvtERNS4nDnCWAIgqdq8GQC+VtVwAF+bz4mIqBlwWmCo6iYAxdVmjwTw\ngTn9AYB7nNU+ERE1Llcfw+igqscAwPx5k4vbJyKiBvJydwdqIyITAUwEgLCwsDqXPZSY6JQ+dElN\ndcp2iYiaI1ePME6ISCcAMH8W1ragqi5U1XhVjQ8ODnZZB4mIqGauDozVAB4ypx8CsMrF7RMRUQM5\n87Ta5QC2AogQkcMi8ksAswHcKSL5AO40nxMRUTPgtGMYqnp/LS/d7qw2iahp27Fjx01eXl7vAYgF\nLxxuyioBZNrt9glxcXFXDh002YPeRHT98fLyeq9jx45RwcHBpz08PNTd/aGaVVZWSlFRUfTx48ff\nAzDCMZ8JT0SuFBscHHyWYdG0eXh4aHBwcAmMkeB/5rupP0R0Y/JgWDQP5r/TDzKCgUFERJYwMIio\n2duyZUvLFStWtHE8X7ZsWZtnn322ozPbXLt2bcCXX37ZqjG3OWPGDKf2+VoxMIio2UtLS/Nbt27d\nlcB44IEHSmbNmnXcmW1u2LAhIDU11b8xt/nGG290aszt1aS8vLzB61oKDBH52so8IqL6Onv2rMeQ\nIUN6RURERIeHh8e8++677VJTU/1uueWWiJiYmKhbb701/MCBAy0AICEhIWLKlCkhvXv3jurWrVvs\n+vXr/S9duiR/+tOfOq9Zs6ZdZGRk9LvvvtvujTfeaD9+/PgwABg1alS3Bx54IGzAgAG20NDQ3uvW\nrfNPTk7u1qNHj5hRo0Z1c/Tj008/bd23b9/I6OjoqOHDh/coKSnxAICQkJDeTz31VOfo6Ogom80W\nnZ6e7pubm+udkpISPH/+/A6RkZHR69evrzE4Dh065HXnnXf2jIiIiI6IiIh2jEjuuOOOnjExMVG9\nevWKmTt3bhAATJ06NaSsrMwjMjIyesSIEd0BYN68eYG9e/eOioyMjB47dmxXu90OAHj99deDunXr\nFpuQkBAxZsyYro73mpeX5z1w4ECbzWaLHjhwoC0/P9/b8RlMmDAhdMCAAbbJkyd36dq1a+zRo0e9\nAKCiogJhYWGxx44du+pZs3UGhoj4ikgggCARaWfezyJQRLoB6Hy1jRMRXc2nn37aumPHjuW5ubnZ\n+fn5Wffee+/ZJ554ImzVqlV7s7Ky9jz00EMnn3766RDH8na7XXbv3r1nzpw5h1588cXOvr6++rvf\n/e7o3XfffTonJyf7V7/61enqbZSUlHht3bo1b/bs2YdGjx4dPn369BP5+flZOTk5Lbds2dLy2LFj\nXrNmzeq0adOmvOzs7D39+/e/8NJLL3VwrB8UFGTPzs7e8+ijjxbNnj27Q0RExOXx48cXTZ48+URO\nTk52UlJSaU3vbfLkyWGJiYnncnNzs7OysrL79+9/CQCWLVtWkJWVtWfXrl3ZCxYs6HD8+HHPefPm\nHfHx8anMycnJXr169f6dO3f6rly5MjAtLS0nJycn28PDQ+fPn9++oKCgxdy5cztt3759T2pqal5+\nfr5v1fbGjh17Ki8vL3v06NGnpkyZ0sXx2t69e303b96c9/777x+67777Tr333nuBALBq1arWUVFR\nFzt16mS/2r/V1RJlEoBpMMJhBwAx558F8PbVNk5EdDX9+/e/OHPmzC5TpkwJGTlyZEn79u3t+fn5\nLYcNG2YDgMrKSgQHB1/Zj5KcnHwaAAYNGnR++vTp3lba+MUvfnHGw8MD/fv3v9C+ffvyhISEiwBg\ns9ku7t271+fAgQPee/fu9U1ISIgEgPLycomLi7sSAmPHjj0NAAkJCRdWr15t+cZvW7ZsCVi5cuV+\nAPDy8kL79u0rAGDOnDkd1q1b1xYAjh8/3iIrK8u3Y8eO56uuu379+oDMzEy/Pn36RAHApUuXPG66\n6SZ7ampqqwEDBpzr0KFDBQD893//9+m8vDxfAEhPT2/1t7/9bS8ATJkypfiFF14IdWzv3nvvPe3l\nZfzJnzJlyskRI0b0ev755wsXLVoU9PDDD5+08n7qDAxV/V8A/ysij6vqm1Y2SERUHzfffHPZzp07\nsz/55JM2M2fODBkyZMjZXr16Xdy1a1dOTcv7+voqYPwBrqiokJqWqW0dT09PeHt7Xzmt18PDA3a7\nXTw9PfXWW289u2bNmv1XaVPtdrulNmuzdu3agI0bNwakpaXlBAQEVCYkJERcvHjxR3t7VFWSk5NP\nvf3220eqzk9JSWnbkHb9/f0rHdO9evUqDwoKsq9evTogPT291eeff77PyjYsHcNQ1TdFZJCIjBWR\n8Y5HQzpNRFRVQUFBi4CAgMqpU6cWT5s27URaWlqr4uJir6+++qoVAJSVlUlaWppvXdto3bp1RWlp\naYNP4hkyZMj5tLQ0/8zMTB8AOHfunMe///1vn7rWCQgIqDh37pxnXcsMHjz43KuvvhoMAHa7HcXF\nxR5nzpzxbNOmTUVAQEBlenq6b0ZGxpUzrby8vLSsrEwAICkp6ezatWvbHTlyxAsATpw44ZmXl+ed\nmJh4fvv27QFFRUWe5eXlWLVq1ZURT79+/c6/99577QBgwYIFgfHx8TXuKgOARx99tGjChAndR4wY\nUewYeVyN1YPeHwKYC+BWALeYj3hLLRAR1WHHjh0t+/btGxUZGRk9Z86cTi+99NLRjz76aO+MGTNC\nIyIiomNiYqI3btxY59lIw4cPP5eXl9fScdC7vn3o3LmzfcGCBQVjxozpYbPZouPi4iJ3795dZ0iN\nGjXqzLp169rWddD7nXfeObhx48YAm80WHRsbG71z586Wo0aNKrHb7WKz2aKfffbZzn369LmyK+qB\nBx4oioqKih4xYkT3uLi4S88999yR22+/3Waz2aKHDRtmO3ToUIvu3buXP/XUU8duueWWqMGDB0fY\nbLaLbdq0qXC09+GHHwbZbLbo5cuXt583b96h2vp///33l1y4cMFz4sSJp6x+TqJ69YsuRWQPgGi1\nsrATxMfHa1paWq2v8wZKTdOK1J5O2e7oxL1O2W5T5IzP0I2fn2RkZBT06dPH0v5yql1JSYlHmzZt\nKsvLy/Gzn/2s18MPP3xy/PjxZ+qzjU2bNvk99dRTXXbs2JFb2zIZGRlBffr06eZ4brX4YCaAjgCO\n1adDRETU+KZPn95506ZNrcvKyuS22247O27cuHqFxbPPPttxyZIlwYsXL67xmE1trAZGEIBsEfkO\nQJljpqqOqH0VIqIbw29/+9uOq1atCqw6b+TIkcVz5sxxysWDCxcuPHwt68+aNet4Qy5stBoYf6jv\nhomIbhRz5sw57qxwaEosBYaqbnR2R4iIqGmzFBgicg6A44C3N4AWAM6ramtndYyIiJoWqyOMgKrP\nReQeAAlO6RERETVJDbrQRVU/BzCsoY2KyFMikiUimSKyXETqPN+ZiOh6d/LkSc/Zs2cHO54XFBS0\nSEpK6uHOPlVndZfUvVWeesC4aK9B12SISAiAJ2Bc13FRRP4KYAyAJQ3ZHhHdeA4lJsY15va6pKbu\naMztNcSpU6c833///ZtmzJhRBADdunUrX79+vaWSHa5idYRxd5XHzwCcAzDyGtr1AtBSRLwA+AE4\neg3bIiJyutzcXO8ePXrEjBkzpmuvXr1iBg8eHF5aWipZWVk+iYmJ4TExMVFxcXER6enpvgCQlZXl\n06dPn8jY2NioadOmdfbz8+sHGBfdDRw40OYol7506dK2APCb3/wm9NChQz6RkZHRkyZNCs3NzfUO\nDw+PAYCbb745smp5lISEhIjU1FS/s2fPeiQnJ3eLjY2NioqKurItZ7F6DOORxmpQVY+IyFwABwFc\nBPAPVf1H9eVEZCKAiQAQFhbWWM3Xy3V2lS0RXaODBw/6Ll26dN+gQYMO/PznP++RkpLS7sMPPwxa\nuHDhgd69e5dt2LCh1ZQpU8K2bduW99hjj3WZOnVq4aRJk4pfeeWVK7ua/Pz8KtetW/d9YGBg5bFj\nx7wGDBgQOXbs2DOvvfba4bvuuqtlTk5ONmAElGOdUaNGFS9btiwwPj7+6IEDB1oUFha2SExMvPDY\nY4+FDB069OzHH39ccPLkSc/4+PioESNGnG3dunVlTf2/VlZrSYWKyGciUigiJ0TkExEJvfqaNW6r\nHYzRSXcYZdNbici46sup6kJVjVfV+ODg4OovExG5XEhISNmgQYMuAkC/fv0uFBQU+KSnp/snJyf3\njIyMjJ46dWrXwsLCFgCQnp7u/+ijjxYDwIQJE67Ua6qsrJRp06aF2my26KFDh9oKCwu9Dx8+XOeX\n9/Hjx592lFVPSUlpd/fdd58GgG+++ab166+/3ikyMjL61ltvjSgrK5Pvv//eUsn3hrB64d5iAH8B\nkGw+H2fOu7MBbd4BYL+qFgGAiHwKYBCApQ3YFhGRy1Qtje7p6aknTpzwCggIsDtGBVYsWLAg8NSp\nU167d+/e4+PjoyEhIb1rKm9eVffu3cvbtm1r3759e8tPP/00cMGCBQcAQFWxcuXK7/v06VNW1/qN\nxeoxjGBVXayqdvOxBEBDv/YfBPBfIuInIgLgdgB7GrgtIiK3ad26dWVoaOjlRYsWtQOMmz1t3bq1\nJQD07du3dMmSJe0AYNGiRVfKhpSUlHgGBQWV+/j46Jo1awKOHj3qDQBt2rSpOH/+fK1/k++7777i\nWbNmdTx37pyn4wZQQ4cOPfvaa691qKw09kBt3ry5pdPeLKwHxkkRGScinuZjHADLJXGrUtXtAFYC\n2Algt9mHhQ3ZFhGRuy1fvnzf4sWLgxz3JP/kk0/aAsCbb7556M033+zQu3fvqGPHjrXw9/evAIAJ\nEyYUZ2RktIqNjY1aunRpYPfu3S8BQMeOHSvi4uJKw8PDYyZNmvSjXf7jxo07vW7dusCRI0cWO+bN\nnj37qN1ul8jIyOjw8PCY5557LqT6eo3JannzMABvARgI43TaLQCeUNWDzuycg7vKm2+Z1fgnb91I\nB71Z3vzaXWcnXtxQ5c3PnTvn0apVq0oPDw8sXLiw3YoVKwK//vrrZvXL29Dy5i8BeEhVTwOAiATC\nuKHSo43eQyKi68DmzZv9nnzyyTBVRevWrSuWLFlS4O4+XSurgXGzIywAQFWLRaSfk/pERNTsJSUl\nlebm5lo+GN4cWD2G4WGeDgvgygjDatgQEdF1wOof/dcAbBGRlTCOYfwPgJed1isiImpyrF7pnSIi\naTAKDgqAe1X1uhpqERFR3SzvVjIDgiFBRHSDalB5cyIiqr9XXnkl+K233moPAG+88Ub7goKCFo7X\nRo8e3XXHjh1N+lYPPHBNRM3OitSejVrefHTiXpeUN3/mmWeKHNNLly4N6tu378Vu3bqVA8CKFSsO\nuKIP14KBQQCcdPHjrMbfJJG75ObmeiclJYX369fvfGZmpl+PHj0uffzxxwUbNmxoNWPGjC4VFRXo\n06fPhZSUlAMtW7bUqVOnhvz9739v6+npqUOGDDm7cOHCw7/+9a87+/v7V3Tv3v1yZmam3/jx43v4\n+vpWpqWl7Rk2bJht7ty5h7Zu3dpq//79PvPnzz8MGCORHTt2+H3wwQeH5s2bF/jOO+90KC8vl/79\n+59PSUk54OXluj/j3CVFRGRRQUGB7+TJk4vy8vKyAwICKl966aUOkyZN6r5ixYq9eXl52Xa7Ha++\n+mrwiRMnPL/44ot2+fn5WXl5edmzZs06VnU7jzzyyOnY2NgLKSkp+3JycrL9/f2vlNx48MEHT3/x\nxRdX7muxcuXKwLFjx57euXOn78qVKwPT0tJycnJysj08PHT+/PntXfn+GRhERBZ17Njx8k9/+tPz\nAPDggw+e2rhxY0BoaGjZzTffXAYADz/88Klvv/02IDAwsMLHx6dyzJgxXT/44IO2/v7+lu9P0blz\nZ3uXLl3Kvv7661bHjx/33Ldvn++dd95Zun79+oDMzEy/Pn36REVGRkZ/++23rfft2+fjrPdaE+6S\nIiKyyCiwfXUtWrTArl279qxevbr1Rx991O6dd965adu2bXlW27nvvvtOL1++vF1kZOSl4cOHn/bw\n8ICqSnJy8qm33377SIPfwDXiCIOIyKJjx455f/XVV60A4C9/+UvgkCFDzh45csQ7MzPTBwBSUlLa\nJyYmnispKfEoLi72HD16dMn8+fMP7dmzx6/6tvz9/StKSko8a2pn3Lhxp9evX9/u448/Dhw7dmwx\nACQlJZ1du3ZtuyNHjngBwIkTJzzz8vKcdrOkmjAwiIgs6tGjx6VFixa1t9ls0adPn/Z67rnnCufP\nn1+QnJzc02azRXt4eODpp58uOnPmjGdSUlK4zWaLTkxMjPjjH/94qPq2xo8ff/Lxxx/vGhkZGV1a\nWvqDoUtwcHBFeHj4xSNHjvgMHTr0AgDExcVdeu65547cfvvtNpvNFj1s2DDboUOHWlTfrjNZKm/u\nbixv7nzO+Ayd8fkBTfczdAaWN286cnNzve+6667w/Pz8LHf3xVWqlzfnCIOIiCxhYBARWRAREXH5\nRhpd1ISBQUREljAwiIjIErcEhoi0FZGVIpIjIntEZKA7+kFERNa568K9/wWwXlXvExFvAD86R5mI\niJoWl48wRKQ1gJ8AeB8AVPWyqp5xdT+IiNwhNzfXe/78+YENWdfPz69fY/enPtwxwugBoAjAYhHp\nA2AHgCdV9XzVhURkIoCJABAWFubyTjrLzK8OO2W7L98R6pTtkjXOuhaIFX9rNvOrw41a3vzlO0Jd\nUt4cAPLz831WrFgROHny5OLqr5WXl6NFC5dei1cv7jiG4QWgP4B3VLUfgPMAZlRfSFUXqmq8qsYH\nBwe7uo9ERD+Qm5vr3aNHj5gxY8Z07dWrV8zgwYPDS0tLJSsryycxMTE8JiYmKi4uLiI9Pd0XAEaN\nGtVt8eLF7RzrO0YHM2fODElLS/OPjIyMfuGFF25644032g8fPrzHsGHDeiUmJtpKSko8Bg4caIuO\njo6y2WzRS5cubVtbn1zNHYFxGMBhVd1uPl8JI0CIiJq0gwcP+j7xxBOF33//fVabNm0qUlJS2k2Y\nMKHrvHnzDmZlZe159dVXD0+ZMqXOXSIvv/zykfj4+NKcnJzs3//+94UAsHPnTv/ly5fv37ZtW56f\nn1/lunXrvs/Ozt6zcePGvGeffTa0stJysVuncvkuKVU9LiKHRCRCVXMB3A7eK5yImoGQkJCyQYMG\nXQSAfv36XSgoKPBJT0/3T05OvlLD5fLly9ZK2laRmJh4tkOHDhUAUFlZKdOmTQvdtm2bv4eHBwoL\nC70PHz7sFRYWZm+8d9Iw7jpL6nEAy8wzpPYBeMRN/SAisszb2/tK8T1PT089ceKEV0BAgD0nJ+dH\nX3q9vLy0oqICAFBZWYny8vJag8TPz+/KEGLBggWBp06d8tq9e/ceHx8fDQkJ6X3x4sUmcc2cWzqh\nqrvM4xM3q+o9qnraHf0gIroWrVu3rgwNDb28aNGidoARDFu3bm0JAF27dr28Y8cOPwBYtmxZW7vd\nLgDQpk2bitLS0hrLmgNASUmJZ1BQULmPj4+uWbMm4OjRoy4tYV6XJpFaRETN1fLly/ctXrw4KCIi\nIjo8PDzmk08+aQsAjz/+eNGWLVsCevfuHbVt27ZWLVu2rASAhISEi15eXhoRERH9wgsv3FR9exMm\nTCjOyMhoFRsbG7V06dLA7t27X3L1e6oN77hHRM2OK0+DdahefPDFF1884ZhOTU3Nr758ly5d7BkZ\nGTmO54475fn4+OjWrVur333vlGOiU6dO9l27duWgBhcuXEi/hrdwzTjCICIiSxgYRERkCQODiIgs\nYWAQkStVVlZW1vs6BXI989/pB1cMMjCIyJUyi4qK2jA0mrbKykopKipqAyCz6nyeJUVELmO32ycc\nP378vePHj8eCX1ibskoAmXa7fULVmQwMInKZuLi4QgAj3N0PahgmPBERWcLAICIiSxgYRERkCQOD\niIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWeK2wBARTxFJF5G17uoDERFZ584RxpMA9rix\nfSIiqge3BIaIhAL4BYD33NE+ERHVn7tGGP8fwDOoVmu9KhGZKCJpIpJWVFTkup4REVGNXB4YInIX\ngEJVrfMm7qq6UFXjVTU+ODjYRb0jIqLauGOEMRjACBEpAPARgGEistQN/SAionpweWCo6u9UNVRV\nuwEYA2CDqo5zdT+IiKh+eB0GERFZ4tY77qnqNwC+cWcfiIjIGo4wiIjIEgYGERFZwsAgIiJLGBhE\nRGQJA4OIiCxhYBARkSUMDCIisoSBQURElrj1wj2ihpj51eFG3+bLd4Q2+jabKmd8fsCN9RneqDjC\nICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkicsD\nQ0S6iMg/RWSPiGSJyJOu7gMREdWfO2pJ2QH8RlV3ikgAgB0i8qWqZruhL0REZJHLRxiqekxVd5rT\n5wDsARDi6n4QEVH9uPUYhoh0A9APwPYaXpsoImkiklZUVOTqrhERUTVuCwwR8QfwCYBpqnq2+uuq\nulBV41U1Pjg42PUdJCKiH3BLYIhICxhhsUxVP3VHH4iIqH7ccZaUAHgfwB5V/bOr2yciooZxxwhj\nMIAHAQwTkV3m4+du6AcREdWDy0+rVdVvAYir2yUiomvDK72JiMgSBgYREVnCwCAiIksYGEREZAkD\ng4iILGFgEBGRJQwMIiKyhIFBRESWMDCIiMgSBgYREVnCwCAiIksYGEREZAkDg4iILGFgEBGRJQwM\nIiKyhIFBRESWMDCIiMgSBgYREVnilsAQkSQRyRWR70Vkhjv6QERE9ePywBARTwBvAxgOIBrA/SIS\n7ep+EBFR/bhjhJEA4HtV3aeqlwF8BGCkG/pBRET1IKrq2gZF7gOQpKoTzOcPAhigqo9VW24igInm\n0wgAuS7taP0EATjp7k40c/wMr11T/wxPqmqSuztBDeflhjalhnk/Si1VXQhgofO7c+1EJE1V493d\nj+aMn+G142dIzuaOXVKHAXSp8jwUwFE39IOIiOrBHYHxLwDhItJdRLwBjAGw2g39ICKienD5LilV\ntYvIYwD+DsATwCJVzXJ1PxpZs9h11sTxM7x2/AzJqVx+0JuIiJonXulNRESWMDCIiMiS6zYwRKSt\niExt4LpLzOtFGqMf34gIT3VsIBEZIiKD3N2P5qi23z0ReVhE3nJHn6h5u24DA0BbAA0KDGoaRMQL\nwBAADIx6MkvwEDWq6zkwZgOEIBBNAAAIEUlEQVToKSK7RORVEZkuIv8SkX+LyAuOhURkvDkvQ0Q+\nrLL+T0Rki4jsc4w2zG+734jIShHJEZFlIiLma7eLSLqI7BaRRSLiU71DInK/+XqmiMypMv+XIpJn\nbvtdEXlLRAJEZL+ItDCXaS0iBY7nTZGItBKRdeZnmSkio80+zxGR78xHL3PZriLytfnZfy0iYeb8\nJSLyZxH5J4AVACYDeMr8d0wUkWRz2xkissmNb9dpROQZEXnCnH5dRDaY07eLyNI6fo9KReRFEdkO\nYGC1bT5i/o5tBDDYle+HriOqel0+AHQDkGlO/xTGKYcCIyTXAvgJgBgYJUeCzOUCzZ9LAHxsLhsN\no/YVYHzbLYFxsaEHgK0AbgXgC+AQAJu5XAqAaeb0NwDiAXQGcBBAMIzTmTcAuMecXwAgEEALAKkA\n3jLXXQzgHnN6IoDX3P25XuUzHwXg3SrP25jvbab5fDyAteb0GgAPmdOPAvi8yme/FoCn+fwPAJ6u\nss3dAELM6bbufs9O+hz/C8DH5nQqgO/M343fm48f/R6ZyyqA/6myHcfvXqcq63gD2Oz4HeODj/o8\nrucRRlU/NR/pAHYCiAQQDmAYgJWqehIAVLW4yjqfq2qlqmYD6FBl/neqelhVKwHsghFMEQD2q2qe\nucwHMAKpqlsAfKOqRapqB7DMXCYBwEZVLVbVchhB5fAegEfM6UdgBEhTthvAHeaIIlFVS8z5y6v8\ndHzzHQjgL+b0hzCC1+FjVa2opY3NAJaIyK9gXMdzPdoBIE5EAgCUwfhiEg8gEcAZ1Px7BAAVAD6p\nYXsDqqxzGcbIjajebpTAEAB/UtW+5qOXqr5vzq/tQpSyauvXNL8Cxre8mupj1dSH+syHqm4G0E1E\nboPxjTvTQjtuYwZmHIzg+JOIPO94qepita1eZfp8HW1MBvAcjPIyu0SkfcN73DSZXxwKYHxJ2AJj\nlDEUQE8YI4XaXKojaHnBFV2z6zkwzgEIMKf/DuBREfEHABEJEZGbAHwN4H8cf3REJLCBbeXA+MPe\ny3z+IICN1ZbZDuA2EQkyD0jeby7znTm/nXmQd1S19VJgfDNv6qMLiEhnABdUdSmAuQD6my+NrvJz\nqzm9BUZZGAB4AMC3tWy26r8jRKSnqm5X1edhVGbtUst6zd0mAE+bP1NhHMvZBWAbav49qst2AENE\npL15DCzZed2m65k7qtW6hKqeEpHNIpIJ4G8wdn9sNY9RlwIYp6pZIvIygI0iUgFjl9XDDWjrkog8\nAuBj84/+vwDMr7bMMRH5HYB/whhVfKGqqwBARGbB+E99FEA2jOMkDssA/BH/2a3TlPUG8KqIVAIo\nBzAFwEoAPuaBWA8Yf+AA4AkAi0RkOoAi/GfXW3VrAKwUkZEAHodxADwcxmf4NYAMZ70ZN0sFMBPA\nVlU9LyKXAKTW9XtUG3OdP8AI62Mwdster7vzyIlYGqQJEBF/VS01w+YzGPW1PjNfuw/ASFV90K2d\nbCARKQAQ7zhORETN13U7wmhm/iAid8A42+ofAD4HABF5E8atbH/uxr4REQHgCIOIiCy6ng96ExFR\nI2JgEBGRJQwMIiKyhIFBTZaI3CMi0e7uBxEZGBjUlN0Do5aX0wiruhJZxsC4AYlINxHZY1bGzRKR\nf4hISxHpKSLrRWSHiKSKSKSIeIpRsVfEuMdIpYj8xNxOqoj0EpHbzGqyu8So2BtQR9vPmJVWM0Rk\ntjnvV2JUEs4QkU9ExE+Me2CMgHEh4C6zbz/qn7l+TxHZZm7jRREpNeeLGJWKM802R5vzh4jIP0Xk\nLwB2i8hLIvJklT6+LGa1WCKqwt3VD/lw/QNGwUQ7gL7m878CGAfjyulwc94AABvM6fUwKvveBeMq\n9pkAfGAUXASMq7EHm9P+ALxqaXc4jJIgfuZzR3Xg9lWW+SOAx83pJQDuq/Jabf1bC+B+c3oygFJz\nehSAL2Fc1dwBRh2mTjCqDp8H0L3K57HTnPYAsLdqn/jggw/jwQv3blz7VXWXOb0Dxh/NQTDKmziW\ncdzTIxVGRdTuAP4E4Fcw6hf9y3x9M4A/i8gyAJ+q6uFa2rwDwGJVvQD8oDpwrIj8EcZNr/xh1P76\nAbMOWG39Gwhj9xVglICZa07fCmC5GgX5Tpj3grgFwFkYVYf3m/0oEJFTItIPRrCkq+qpWt4D0Q2L\ngXHjql51twOAM6rat4ZlHcXvOgN4HsB0GN/SNwGAqs4WkXUwrkjfJiJ3qGpODduprTrwEhj3dMgQ\nkYfNbVfnUUf/alNXFeHqFXHfg1FHrCOARfVog+iGwWMY5HAWwH4RSQau7P/vY762Hca3+0pVvQSj\nauokGEHiqCC7W1XnAEiDcb+RmvwDRtVgP3M9R3XgAADHzEqqD1RZ/kqlWlWtq3/b8J8qv2OqrL8J\nwGjzOEwwjFHSd7X07TMASTBGID8a4RARA4N+6AEAvxSRDABZAEYCgKqWwbij4DZzuVQYf8h3m8+n\nmQeWMwBchFEd+EdUdT2A1QDSRGQXjPLdAPD/YITSlzBKxTt8BGC6eSC9Z239AzANwK9F5DsYxygc\n1X4/A/BvGBVtNwB4RlWP19K3yzAqwP5Va7+nBNENjbWkqNkzRywXVVVFZAyMA+Ajr7ZetW14wCj7\nnayq+c7oJ1Fzx2MYdD2IA/CWGEfDz8C4R7hl5sWBawF8xrAgqh1HGNToRKQ3jPt0V1WmqgPc0R8i\nahwMDCIisoQHvYmIyBIGBhERWcLAICIiSxgYRERkCQODiIgs+T/LvTrWwTZq/wAAAABJRU5ErkJg\ngg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fc = sns.factorplot(x=\"news_category\", hue=\"sentiment_category\", \n", " data=df, kind=\"count\", \n", " palette={\"negative\": \"#FE2020\", \n", " \"positive\": \"#BADD07\", \n", " \"neutral\": \"#68BFF5\"})" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Most Negative World News Article: A Czech woman drowned after being trapped inside Prague's underground drainage system while participating in a global GPS-based treasure hunt, police officials said. The woman was geocaching when heavy downpours led to rapidly rising water. The body of the 27-year-old victim, who has not been identified, was found in the Vltava river. \n", "\n", "Most Positive World News Article: Pope Francis on Sunday said he is praying that the upcoming summit between US President Donald Trump and North Korean leader Kim Jong-un succeeds in laying the groundwork for peace. Urging people around the world to pray for the summit, the pontiff said, \"I want to offer the beloved people of Korea an especial thought of friendship.\"\n" ] } ], "source": [ "pos_idx = df[(df.news_category=='world') & (df.sentiment_score == 0.7)].index[0]\n", "neg_idx = df[(df.news_category=='world') & (df.sentiment_score == -0.296)].index[0]\n", "\n", "print('Most Negative World News Article:', news_df.iloc[neg_idx][['news_article']][0])\n", "print()\n", "print('Most Positive World News Article:', news_df.iloc[pos_idx][['news_article']][0])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predicted:
negativeneutralpositive
Actual:negative1656
neutral328
positive4525
\n", "
" ], "text/plain": [ " Predicted: \n", " negative neutral positive\n", "Actual: negative 16 5 6\n", " neutral 3 2 8\n", " positive 4 5 25" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import model_evaluation_utils as meu\n", "meu.display_confusion_matrix_pretty(true_labels=sentiment_category, \n", " predicted_labels=sentiment_category_tb, \n", " classes=['negative', 'neutral', 'positive'])" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }