{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Imports\n", "from tqdm import tqdm\n", "from colorama import Fore\n", "import json\n", "import pandas as pd\n", "import re" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# load data in memory\n", "data = []\n", "with open('../data/positive_tweets.json', 'r') as infile: \n", " for line in infile:\n", " text = json.loads(line).get('text').strip()\n", " data.append(text)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_all(text):\n", " \"\"\"extracts @ and # words and stores them in list buffer\"\"\"\n", " return [i for i in text.split() if i.startswith('@') or i.startswith('#')] " ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[30mProcessing 4999: 100%|\u001b[32m██████████\u001b[31m| 5000/5000 [00:06<00:00, 764.45it/s]\n" ] } ], "source": [ "mentions = []\n", "\n", "# print Fore.__dict__\n", "# bar_format let's you add color to each of left, middle, right segment of the progress bar\n", "pbar = tqdm(range(len(data)), bar_format=\"%s{l_bar}%s{bar}%s{r_bar}\" % (Fore.BLACK, Fore.GREEN, Fore.RED))\n", "for i in pbar:\n", " text = data[i]\n", " mentions.append(get_all(text))\n", " # show progressbar ui\n", " pbar.set_description(\"Processing %s\" % i)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[[u'#FollowFriday', u'@France_Inte', u'@PKuchly57', u'@Milipol_Paris'],\n", " [u'@Lamb2ja'],\n", " [u'@DespiteOfficial'],\n", " [u'@97sides']]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mentions[:4]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }