{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pymongo import MongoClient\n", "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class DBConnector():\n", " def __init__(self,db_name,usr,ip,psw):\n", " self.db_name = db_name\n", " self.usr = usr\n", " self.psw = psw\n", " self.uri = 'mongodb://{0}:{1}@{2}/{3}'.format(usr,psw,ip,db_name)\n", " print(self.uri)\n", " def mongoClient(self, collection):\n", " self.mongo_client = MongoClient(self.uri)\n", " db = self.mongo_client[self.db_name]\n", " db_client = db[collection]\n", " return db_client\n", "\n", " def close_connection(self):\n", " self.mongo_client.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "db_connector = DBConnector(\"twitter_public\",\"btw17_public_user\",\"10.6.13.55\",\"btw17_public\")\n", "c = \"politicians_sample\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "collection = db_connector.mongoClient(c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Query the sample data base. \n", "Example of a query to fetch tweets between 19:00 hours to 22:00 hours is shown below." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "query = collection.find({\"createdAt\" : {\"$gte\" : datetime(2017, 9, 23, 19, 0, 0), \"$lt\": datetime(2017, 9, 23, 22, 0 , 0)}})\n", "\n", "from pprint import pprint\n", "print(\"Print the first 5 elements\")\n", "pprint([el for el in query[0:5]])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "collection.count_documents({\"createdAt\" : {\"$gte\" : datetime(2017, 9, 23, 19, 0, 0), \"$lt\": datetime(2017, 9, 23, 22, 0 , 0)}})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import tweets into dataframe" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "query = collection.find({\"createdAt\" : {\"$gte\" : datetime(2017, 9, 23, 19, 0, 0), \"$lt\": datetime(2017, 9, 23, 22, 0 , 0)}})\n", "tweets = pd.DataFrame(list(query))\n", "tweets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualize number of tweets per hour" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "# aggregate per hour\n", "counts = tweets['createdAt'].dt.to_period('H').value_counts()\n", "\n", "# plot\n", "ax = counts.plot(kind='bar', title='Number of Tweets Per Hour')\n", "ax.tick_params(axis='x', labelsize=8)\n", "ax.tick_params(axis='y', labelsize=10)\n", "ax.set_xlabel('Time', fontsize=15)\n", "ax.set_ylabel('Number of tweets' , fontsize=15)\n", "ax.set_title('Number of Tweets Per Hour', fontsize=15, fontweight='bold')\n", "pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }