{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "51565" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Import pymongo module\n", "from pymongo import MongoClient\n", "\n", "# Create client, an instance and the collection\n", "client = MongoClient('localhost', 27017)\n", "db = client['mgo_db']\n", "collection = db['time100_collection']\n", "\n", "# Show how many documents in the collection\n", "collection.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NOTE: THE PREVIOUS BLOCK OF CELL CAN BE COMMENTED OUT WHEN IT IS NOT NECESSARY. IT NEED NOT TO BE RAN EACH TIME UNLESS YOU WANT TO GET THE LATEST TWEETS." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# # Import pandas module\n", "# import pandas as pd\n", "\n", "# # Load all the collection into \n", "# tweets = collection.find()\n", "\n", "# # Import json_normalize and use this to normalize or flatten the JSON into a pandas dataframe\n", "# from pandas.io.json import json_normalize\n", "# df_tweets = json_normalize(tweets)\n", "\n", "# # Leave out the noise\n", "# df_tweets = df_tweets[['created_at', 'text', 'favorite_count', 'retweet_count', 'entities.hashtags','lang','user.screen_name',\n", "# 'user.name', 'user.followers_count', 'user.friends_count', 'user.time_zone', 'user.location', \n", "# 'user.verified', 'user.profile_image_url', 'coordinates', 'coordinates.type', 'coordinates.coordinates',\n", "# 'place', 'place.place_type', 'place.name', 'place.full_name', 'place.country_code', 'place.country',\n", " \n", "# 'retweeted_status.created_at', 'retweeted_status.text', 'retweeted_status.favorite_count', \n", "# 'retweeted_status.retweet_count', 'retweeted_status.entities.hashtags', 'retweeted_status.lang',\n", "# 'retweeted_status.user.screen_name', 'retweeted_status.user.name', 'retweeted_status.user.followers_count', \n", "# 'retweeted_status.user.friends_count', 'retweeted_status.user.time_zone', 'retweeted_status.user.location',\n", "# 'retweeted_status.coordinates', 'retweeted_status.coordinates.type', 'retweeted_status.coordinates.coordinates', \n", "# 'retweeted_status.place', 'retweeted_status.place.place_type', 'retweeted_status.place.name', \n", "# 'retweeted_status.place.full_name', 'retweeted_status.place.country_code', 'retweeted_status.place.country',\n", " \n", "# 'quoted_status.created_at', 'quoted_status.text', 'quoted_status.favorite_count', \n", "# 'quoted_status.retweet_count', 'quoted_status.entities.hashtags', 'quoted_status.lang',\n", "# 'quoted_status.user.screen_name', 'quoted_status.user.name', 'quoted_status.user.followers_count', \n", "# 'quoted_status.user.friends_count', 'quoted_status.user.time_zone', 'quoted_status.user.location',\n", "# 'quoted_status.coordinates', 'quoted_status.coordinates.type', 'quoted_status.coordinates.coordinates', \n", "# 'quoted_status.place', 'quoted_status.place.place_type', 'quoted_status.place.name', \n", "# 'quoted_status.place.full_name', 'quoted_status.place.country_code', 'quoted_status.place.country'\n", "# ]]\n", "\n", "### SAVE A COPY IN CSV, SO THAT WE CAN COMMENT OUT AND SKIP THE PREVIOUS STEPS WHEN IT IS NOT NECESSARY. \n", "# df_tweets.to_csv('data/time100_tweets_0430_730PM.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NOTE: THE PREVIOUS BLOCK OF CELL CAN BE COMMENTED OUT WHEN IT IS NOT NECESSARY. IT NEED NOT TO BE RAN EACH TIME UNLESS YOU RAN THE FIRST BLOCK." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | text | \n", "retweeted_status.text | \n", "quoted_status.text | \n", "
---|---|---|---|
0 | \n", "RT @BillGates: Melindas work on behalf of wom... | \n", "Melindas work on behalf of women and girls in... | \n", "NaN | \n", "
1 | \n", "RT @TIME: Colin Kaepernicks willingness to t... | \n", "Colin Kaepernicks willingness to take a posi... | \n", "NaN | \n", "
2 | \n", "WWVDD? What Would Viola Davis Do?! Congratulat... | \n", "NaN | \n", "Meryl Streep on @ViolaDavis: Her gifts as an ... | \n", "
3 | \n", "RT @RitaPanahi: You're honoured to write about... | \n", "You're honoured to write about an anti-Semitic... | \n", "The four exceptional organizers of the @womens... | \n", "
4 | \n", "RT @TIME: See why Thailand's new king Maha Vaj... | \n", "See why Thailand's new king Maha Vajiralongkor... | \n", "NaN | \n", "