{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "# Twitter Activities During Crises\n", "\n", "Extract number of tweets and unique users per day along with various Twitter activities.\n", "Then compare this frequency data with frequencies for relevant tweets." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting download from file:/cliphomes/cbuntain/SparkTwitterAnalytics-1.0-SNAPSHOT.jar\n", "Finished download of SparkTwitterAnalytics-1.0-SNAPSHOT.jar\n", "Starting download from file:/cliphomes/cbuntain/.m2/repository/org/twitter4j/twitter4j-core/4.0.4/twitter4j-core-4.0.4.jar\n", "Finished download of twitter4j-core-4.0.4.jar\n" ] } ], "source": [ "// Add the spark analytics jar with my code for various counting capabilities\n", "%AddJar file:/cliphomes/cbuntain/SparkTwitterAnalytics-1.0-SNAPSHOT.jar\n", "%AddJar file:/cliphomes/cbuntain/.m2/repository/org/twitter4j/twitter4j-core/4.0.4/twitter4j-core-4.0.4.jar" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Bokeh JAR files\n", "\n", "Need to load a bunch of JAR files to use Bokeh-scala plotting library" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting download from http://repo1.maven.org/maven2/org/spire-math/spire-macros_2.10/0.7.4/spire-macros_2.10-0.7.4.jar\n", "Finished download of spire-macros_2.10-0.7.4.jar\n", "Starting download from http://repo1.maven.org/maven2/com/typesafe/config/1.2.1/config-1.2.1.jar\n", "Finished download of config-1.2.1.jar\n", "Starting download from http://repo1.maven.org/maven2/com/typesafe/play/play-functional_2.10/2.3.10/play-functional_2.10-2.3.10.jar\n", "Finished download of play-functional_2.10-2.3.10.jar\n", "Starting download from http://repo1.maven.org/maven2/org/scalanlp/breeze-macros_2.10/0.11.2/breeze-macros_2.10-0.11.2.jar\n", "Finished download of breeze-macros_2.10-0.11.2.jar\n", "Starting download from http://repo1.maven.org/maven2/com/github/scala-incubator/io/scala-io-file_2.10/0.4.3/scala-io-file_2.10-0.4.3.jar\n", "Finished download of scala-io-file_2.10-0.4.3.jar\n", "Starting download from http://repo1.maven.org/maven2/com/github/fommil/netlib/core/1.1.2/core-1.1.2.jar\n", "Finished download of core-1.1.2.jar\n", "Starting download from http://repo1.maven.org/maven2/org/slf4j/slf4j-api/1.7.5/slf4j-api-1.7.5.jar\n", "Finished download of slf4j-api-1.7.5.jar\n", "Starting download from http://repo1.maven.org/maven2/com/typesafe/play/play-json_2.10/2.3.10/play-json_2.10-2.3.10.jar\n", "Finished download of play-json_2.10-2.3.10.jar\n", "Starting download from http://repo1.maven.org/maven2/com/jsuereth/scala-arm_2.10/1.3/scala-arm_2.10-1.3.jar\n", "Finished download of scala-arm_2.10-1.3.jar\n", "Starting download from http://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.3.2/jackson-core-2.3.2.jar\n", "Finished download of jackson-core-2.3.2.jar\n", "Starting download from http://repo1.maven.org/maven2/org/joda/joda-convert/1.6/joda-convert-1.6.jar\n", "Finished download of joda-convert-1.6.jar\n", "Starting download from http://repo1.maven.org/maven2/io/continuum/bokeh/core_2.10/0.7/core_2.10-0.7.jar\n", "Finished download of core_2.10-0.7.jar\n", "Starting download from http://repo1.maven.org/maven2/joda-time/joda-time/2.8.1/joda-time-2.8.1.jar\n", "Finished download of joda-time-2.8.1.jar\n", "Starting download from http://repo1.maven.org/maven2/net/sf/opencsv/opencsv/2.3/opencsv-2.3.jar\n", "Finished download of opencsv-2.3.jar\n", "Starting download from http://repo1.maven.org/maven2/com/github/scala-incubator/io/scala-io-core_2.10/0.4.3/scala-io-core_2.10-0.4.3.jar\n", "Finished download of scala-io-core_2.10-0.4.3.jar\n", "Starting download from http://repo1.maven.org/maven2/org/apache/commons/commons-math3/3.2/commons-math3-3.2.jar\n", "Finished download of commons-math3-3.2.jar\n", "Starting download from http://repo1.maven.org/maven2/junit/junit/4.8.2/junit-4.8.2.jar\n", "Finished download of junit-4.8.2.jar\n", "Starting download from http://repo1.maven.org/maven2/com/github/rwl/jtransforms/2.4.0/jtransforms-2.4.0.jar\n", "Finished download of jtransforms-2.4.0.jar\n", "Starting download from http://repo1.maven.org/maven2/com/typesafe/play/play-datacommons_2.10/2.3.10/play-datacommons_2.10-2.3.10.jar\n", "Finished download of play-datacommons_2.10-2.3.10.jar\n", "Starting download from http://repo1.maven.org/maven2/com/typesafe/play/play-iteratees_2.10/2.3.10/play-iteratees_2.10-2.3.10.jar\n", "Finished download of play-iteratees_2.10-2.3.10.jar\n", "Starting download from http://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-annotations/2.3.2/jackson-annotations-2.3.2.jar\n", "Finished download of jackson-annotations-2.3.2.jar\n", "Starting download from http://repo1.maven.org/maven2/io/continuum/bokeh/bokeh_2.10/0.7/bokeh_2.10-0.7.jar\n", "Finished download of bokeh_2.10-0.7.jar\n", "Starting download from http://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.3.2/jackson-databind-2.3.2.jar\n", "Finished download of jackson-databind-2.3.2.jar\n", "Starting download from http://repo1.maven.org/maven2/net/sourceforge/f2j/arpack_combined_all/0.1/arpack_combined_all-0.1.jar\n", "Finished download of arpack_combined_all-0.1.jar\n", "Starting download from http://repo1.maven.org/maven2/io/continuum/bokeh/bokehjs_2.10/0.7/bokehjs_2.10-0.7.jar\n", "Finished download of bokehjs_2.10-0.7.jar\n", "Starting download from http://repo1.maven.org/maven2/org/spire-math/spire_2.10/0.7.4/spire_2.10-0.7.4.jar\n", "Finished download of spire_2.10-0.7.4.jar\n", "Starting download from http://repo1.maven.org/maven2/org/scalanlp/breeze_2.10/0.11.2/breeze_2.10-0.11.2.jar\n", "Finished download of breeze_2.10-0.11.2.jar\n" ] } ], "source": [ "%AddJar http://repo1.maven.org/maven2/org/spire-math/spire-macros_2.10/0.7.4/spire-macros_2.10-0.7.4.jar\n", "%AddJar http://repo1.maven.org/maven2/com/typesafe/config/1.2.1/config-1.2.1.jar\n", "%AddJar http://repo1.maven.org/maven2/com/typesafe/play/play-functional_2.10/2.3.10/play-functional_2.10-2.3.10.jar\n", "%AddJar http://repo1.maven.org/maven2/org/scalanlp/breeze-macros_2.10/0.11.2/breeze-macros_2.10-0.11.2.jar\n", "%AddJar http://repo1.maven.org/maven2/com/github/scala-incubator/io/scala-io-file_2.10/0.4.3/scala-io-file_2.10-0.4.3.jar\n", "%AddJar http://repo1.maven.org/maven2/com/github/fommil/netlib/core/1.1.2/core-1.1.2.jar\n", "%AddJar http://repo1.maven.org/maven2/org/slf4j/slf4j-api/1.7.5/slf4j-api-1.7.5.jar\n", "%AddJar http://repo1.maven.org/maven2/com/typesafe/play/play-json_2.10/2.3.10/play-json_2.10-2.3.10.jar\n", "%AddJar http://repo1.maven.org/maven2/com/jsuereth/scala-arm_2.10/1.3/scala-arm_2.10-1.3.jar\n", "%AddJar http://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.3.2/jackson-core-2.3.2.jar\n", "%AddJar http://repo1.maven.org/maven2/org/joda/joda-convert/1.6/joda-convert-1.6.jar\n", "%AddJar http://repo1.maven.org/maven2/io/continuum/bokeh/core_2.10/0.7/core_2.10-0.7.jar\n", "%AddJar http://repo1.maven.org/maven2/joda-time/joda-time/2.8.1/joda-time-2.8.1.jar\n", "%AddJar http://repo1.maven.org/maven2/net/sf/opencsv/opencsv/2.3/opencsv-2.3.jar\n", "%AddJar http://repo1.maven.org/maven2/com/github/scala-incubator/io/scala-io-core_2.10/0.4.3/scala-io-core_2.10-0.4.3.jar\n", "%AddJar http://repo1.maven.org/maven2/org/apache/commons/commons-math3/3.2/commons-math3-3.2.jar\n", "%AddJar http://repo1.maven.org/maven2/junit/junit/4.8.2/junit-4.8.2.jar\n", "%AddJar http://repo1.maven.org/maven2/com/github/rwl/jtransforms/2.4.0/jtransforms-2.4.0.jar\n", "%AddJar http://repo1.maven.org/maven2/com/typesafe/play/play-datacommons_2.10/2.3.10/play-datacommons_2.10-2.3.10.jar\n", "%AddJar http://repo1.maven.org/maven2/com/typesafe/play/play-iteratees_2.10/2.3.10/play-iteratees_2.10-2.3.10.jar\n", "%AddJar http://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-annotations/2.3.2/jackson-annotations-2.3.2.jar\n", "%AddJar http://repo1.maven.org/maven2/io/continuum/bokeh/bokeh_2.10/0.7/bokeh_2.10-0.7.jar\n", "%AddJar http://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.3.2/jackson-databind-2.3.2.jar\n", "%AddJar http://repo1.maven.org/maven2/net/sourceforge/f2j/arpack_combined_all/0.1/arpack_combined_all-0.1.jar\n", "%AddJar http://repo1.maven.org/maven2/io/continuum/bokeh/bokehjs_2.10/0.7/bokehjs_2.10-0.7.jar\n", "%AddJar http://repo1.maven.org/maven2/org/spire-math/spire_2.10/0.7.4/spire_2.10-0.7.4.jar\n", "%AddJar http://repo1.maven.org/maven2/org/scalanlp/breeze_2.10/0.11.2/breeze_2.10-0.11.2.jar" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "// Packages for Bokeh plotting\n", "import io.continuum.bokeh.Plot\n", "import io.continuum.bokeh.Grid\n", "import io.continuum.bokeh.DatetimeTicker\n", "import io.continuum.bokeh.Legend\n", "import io.continuum.bokeh.Document\n", "import io.continuum.bokeh.ColumnDataSource\n", "import io.continuum.bokeh.DataRange1d\n", "import io.continuum.bokeh.Circle\n", "import io.continuum.bokeh.Line\n", "import io.continuum.bokeh.GlyphRenderer\n", "import io.continuum.bokeh.Color\n", "import io.continuum.bokeh.DatetimeAxis\n", "import io.continuum.bokeh.LinearAxis\n", "import io.continuum.bokeh.PanTool\n", "import io.continuum.bokeh.PreviewSaveTool\n", "import io.continuum.bokeh.NumeralTickFormatter" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "// Packages for RDD manipulation\n", "import org.apache.spark.rdd.RDD\n", "import org.apache.spark.SparkContext\n", "import org.apache.spark.SparkContext._" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "// Packages for Twitter JSON parsing\n", "import twitter4j.Status\n", "import twitter4j.TwitterObjectFactory" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "// My packages for calculating activity frequencies\n", "import edu.umd.cs.hcil.spark.analytics.ActivityFrequency\n", "import edu.umd.cs.hcil.spark.analytics.UniqueUserFrequency" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "// We're looking at daily activities\n", "val timeScale : ActivityFrequency.TimeScale.Value = ActivityFrequency.TimeScale.DAILY" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val targetFiles = \"/collections/tweets/TweetsCrawl/us-west/2015-11/statuses.log.2015*.gz\"\n", "val twitterStrings = sc.textFile(targetFiles).repartition(3000)\n", "\n", "// Convert each JSON line in the file to a status using Twitter4j\n", "// Note that not all lines are Status lines, so we catch any exception\n", "// generated during this conversion and set to null since we don't care\n", "// about non-status lines.'\n", "val tweets = twitterStrings.map(line => {\n", " try {\n", " TwitterObjectFactory.createStatus(line)\n", " } catch {\n", " case e : Exception => null\n", " }\n", " }).filter(status => status != null)\n", "\n", "val datedCounts = ActivityFrequency.activityCounter(tweets, timeScale)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val datedCountsMap = datedCounts.collectAsMap()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val timeScale : UniqueUserFrequency.TimeScale.Value = UniqueUserFrequency.TimeScale.DAILY\n", "val datedUserCounts = UniqueUserFrequency.userCounter(tweets, timeScale)\n", "val datedUserCountsMap = datedUserCounts.collectAsMap()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build Bokeh Plots" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "val eventTime = 1447446000000.0\n", "\n", "object eventSource extends ColumnDataSource {\n", " val x = column(List(eventTime))\n", " val y = column(List(0.0d))\n", "}" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val dateList = datedCountsMap.keys.toList.sorted\n", "\n", "object source extends ColumnDataSource {\n", " val dates = column(dateList.map(d => d.getTime.asInstanceOf[Double]))\n", " val tweets = column(dateList.map(d => datedCountsMap(d)._1.asInstanceOf[Double]))\n", " val rt = column(dateList.map(d => datedCountsMap(d)._2.asInstanceOf[Double]))\n", " val ments = column(dateList.map(d => datedCountsMap(d)._3.asInstanceOf[Double]))\n", " val urls = column(dateList.map(d => datedCountsMap(d)._4.asInstanceOf[Double]))\n", " val media = column(dateList.map(d => datedCountsMap(d)._5.asInstanceOf[Double]))\n", " val tags = column(dateList.map(d => datedCountsMap(d)._6.asInstanceOf[Double]))\n", " val users = column(dateList.map(d => datedUserCountsMap(d).asInstanceOf[Double]))\n", "}" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "io.continuum.bokeh.Plot@6881f4ff" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val xdr = new DataRange1d()\n", "val ydr = new DataRange1d()\n", "\n", "val plot = new Plot().x_range(xdr).y_range(ydr)\n", "plot.width(800)\n", "plot.title(\"General Activity\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val dataList = List((source.tweets, Color.Black, \"Tweets\"), (source.rt, Color.Blue, \"Retweets\"), (source.ments, Color.Red, \"Mentions\"), (source.urls, Color.Yellow, \"URLs\"), (source.media, Color.Green, \"Media\"), (source.tags, Color.Orange, \"Hashtags\"), (source.users, Color.Cyan, \"Users\"))\n", "var rendList = List[io.continuum.bokeh.GlyphRenderer]()\n", "var legendList = List[(String, List[io.continuum.bokeh.GlyphRenderer])]()\n", "\n", "for (dataTuple <- dataList) {\n", " val xData = source.dates\n", " val yData = dataTuple._1\n", " val color = dataTuple._2\n", " val name = dataTuple._3\n", " \n", " val line = new Line().x(xData).y(yData).line_color(color)\n", " val rendLine = new GlyphRenderer().data_source(source).glyph(line)\n", " \n", " rendList = rendList :+ rendLine\n", " legendList = legendList :+ (name -> List(rendLine))\n", "}\n", "\n", "val circle = new Circle().x(eventSource.x).y(eventSource.y).fill_color(Color.Red).size(10)\n", "val eventCircle = new GlyphRenderer().data_source(eventSource).glyph(circle)\n", "rendList = rendList :+ eventCircle\n", "legendList = legendList :+ (\"Event\" -> List(eventCircle))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val xTick = new DatetimeTicker().desired_num_ticks(10)\n", "\n", "val yForm = new NumeralTickFormatter()\n", "\n", "val xaxis = new DatetimeAxis().ticker(xTick).plot(plot)\n", "val yaxis = new LinearAxis().formatter(yForm).plot(plot)\n", "\n", "val gridX = new Grid().plot(plot).dimension(0).ticker(xaxis.ticker.value)\n", "val gridY = new Grid().plot(plot).dimension(1).ticker(yaxis.ticker.value)\n", "\n", "plot.below <<= (xaxis :: _)\n", "plot.left <<= (yaxis :: _)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val legend = new Legend().plot(plot).legends(legendList)\n", "\n", "val pantool = new PanTool().plot(plot)\n", "val previewTool = new PreviewSaveTool().plot(plot)\n", "\n", "plot.renderers <<= (rendList ++ _)\n", "\n", "plot.renderers <<= (xaxis :: yaxis :: gridX :: gridY :: _)\n", "plot.renderers <<= (legend :: _)\n", "\n", "plot.tools := List(pantool, previewTool)\n", "\n", "val document = new Document(plot)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " BokehJS successfully loaded.\n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "kernel.display.html(document.fragment.preamble.toString)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", " \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "kernel.display.html(document.fragment.html.toString)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Distribution Plots\n", "\n", "Plot by percentage rather than volume." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "val dateList = datedCountsMap.keys.toList.sorted\n", "\n", "object distSource extends ColumnDataSource {\n", " val dates = column(dateList.map(d => d.getTime.asInstanceOf[Double]))\n", " val rt = column(dateList.map(d => datedCountsMap(d)._2.asInstanceOf[Double] / datedCountsMap(d)._1.asInstanceOf[Double]))\n", " val ments = column(dateList.map(d => datedCountsMap(d)._3.asInstanceOf[Double] / datedCountsMap(d)._1.asInstanceOf[Double]))\n", " val urls = column(dateList.map(d => datedCountsMap(d)._4.asInstanceOf[Double] / datedCountsMap(d)._1.asInstanceOf[Double]))\n", " val media = column(dateList.map(d => datedCountsMap(d)._5.asInstanceOf[Double] / datedCountsMap(d)._1.asInstanceOf[Double]))\n", " val tags = column(dateList.map(d => datedCountsMap(d)._6.asInstanceOf[Double] / datedCountsMap(d)._1.asInstanceOf[Double]))\n", "}" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val xdr = new DataRange1d()\n", "val ydr = new DataRange1d().start(0.0).end(1.0)\n", "\n", "val distPlot = new Plot().x_range(xdr).y_range(ydr)\n", "distPlot.width(800)\n", "distPlot.title(\"Distribution of Activity\")\n", "\n", "val dataList = List((distSource.rt, Color.Blue, \"Retweets\"), (distSource.ments, Color.Red, \"Mentions\"), (distSource.urls, Color.Yellow, \"URLs\"), (distSource.media, Color.Green, \"Media\"), (distSource.tags, Color.Orange, \"Hashtags\"))\n", "var rendList = List[io.continuum.bokeh.GlyphRenderer]()\n", "var legendList = List[(String, List[io.continuum.bokeh.GlyphRenderer])]()\n", "\n", "for (dataTuple <- dataList) {\n", " val xData = distSource.dates\n", " val yData = dataTuple._1\n", " val color = dataTuple._2\n", " val name = dataTuple._3\n", " \n", " val line = new Line().x(xData).y(yData).line_color(color)\n", " val rendLine = new GlyphRenderer().data_source(distSource).glyph(line)\n", " \n", " rendList = rendList :+ rendLine\n", " legendList = legendList :+ (name -> List(rendLine))\n", "}\n", "\n", "val circle = new Circle().x(eventSource.x).y(eventSource.y).fill_color(Color.Red).size(10)\n", "val eventCircle = new GlyphRenderer().data_source(eventSource).glyph(circle)\n", "rendList = rendList :+ eventCircle\n", "legendList = legendList :+ (\"Event\" -> List(eventCircle))\n", "\n", "val xTick = new DatetimeTicker().desired_num_ticks(10)\n", "\n", "val yForm = new NumeralTickFormatter().format(\"0.0\")\n", "\n", "val xaxis = new DatetimeAxis().ticker(xTick).plot(distPlot)\n", "val yaxis = new LinearAxis().formatter(yForm).plot(distPlot)\n", "\n", "val gridX = new Grid().plot(distPlot).dimension(0).ticker(xaxis.ticker.value)\n", "val gridY = new Grid().plot(distPlot).dimension(1).ticker(yaxis.ticker.value)\n", "\n", "distPlot.below <<= (xaxis :: _)\n", "distPlot.left <<= (yaxis :: _)\n", "\n", "val legend = new Legend().plot(distPlot).legends(legendList)\n", "\n", "val pantool = new PanTool().plot(distPlot)\n", "val previewTool = new PreviewSaveTool().plot(distPlot)\n", "\n", "distPlot.renderers <<= (rendList ++ _)\n", "\n", "distPlot.renderers <<= (xaxis :: yaxis :: gridX :: gridY :: _)\n", "distPlot.renderers <<= (legend :: _)\n", "\n", "distPlot.tools := List(pantool, previewTool)\n", "\n", "val document = new Document(distPlot)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " BokehJS successfully loaded.\n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "kernel.display.html(document.fragment.preamble.toString)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", " \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "kernel.display.html(document.fragment.html.toString)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Relevant Tweets" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "// We're looking at daily activities\n", "val timeScale : ActivityFrequency.TimeScale.Value = ActivityFrequency.TimeScale.DAILY\n", "\n", "val targetFiles = \"/user/cbuntain/paris_nov\"\n", "val twitterStrings = sc.textFile(targetFiles).repartition(3000)\n", "\n", "// Convert each JSON line in the file to a status using Twitter4j\n", "// Note that not all lines are Status lines, so we catch any exception\n", "// generated during this conversion and set to null since we don't care\n", "// about non-status lines.'\n", "val tweets = twitterStrings.map(line => {\n", " try {\n", " TwitterObjectFactory.createStatus(line)\n", " } catch {\n", " case e : Exception => null\n", " }\n", " }).filter(status => status != null)\n", "\n", "val relevantDatedCounts = ActivityFrequency.activityCounter(tweets, timeScale)\n", "val relevantDatedCountsMap = relevantDatedCounts.collectAsMap()\n", "\n", "\n", "val timeScale : UniqueUserFrequency.TimeScale.Value = UniqueUserFrequency.TimeScale.DAILY\n", "val relevantDatedUserCounts = UniqueUserFrequency.userCounter(tweets, timeScale)\n", "val relevantDatedUserCountsMap = relevantDatedUserCounts.collectAsMap()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "val dateList = relevantDatedCountsMap.keys.toList.sorted\n", "\n", "object distSource extends ColumnDataSource {\n", " val dates = column(dateList.map(d => d.getTime.asInstanceOf[Double]))\n", " val tCount= column(dateList.map(d => relevantDatedCountsMap(d)._1.asInstanceOf[Double]))\n", " val tweets= column(dateList.map(d => relevantDatedCountsMap(d)._1.asInstanceOf[Double] / datedCountsMap(d)._1.asInstanceOf[Double]))\n", " val rt = column(dateList.map(d => relevantDatedCountsMap(d)._2.asInstanceOf[Double] / relevantDatedCountsMap(d)._1.asInstanceOf[Double]))\n", " val ments = column(dateList.map(d => relevantDatedCountsMap(d)._3.asInstanceOf[Double] / relevantDatedCountsMap(d)._1.asInstanceOf[Double]))\n", " val urls = column(dateList.map(d => relevantDatedCountsMap(d)._4.asInstanceOf[Double] / relevantDatedCountsMap(d)._1.asInstanceOf[Double]))\n", " val media = column(dateList.map(d => relevantDatedCountsMap(d)._5.asInstanceOf[Double] / relevantDatedCountsMap(d)._1.asInstanceOf[Double]))\n", " val tags = column(dateList.map(d => relevantDatedCountsMap(d)._6.asInstanceOf[Double] / relevantDatedCountsMap(d)._1.asInstanceOf[Double]))\n", "}" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val xdr = new DataRange1d()\n", "val ydr = new DataRange1d().start(0.0).end(1.0)\n", "\n", "val distPlot = new Plot().x_range(xdr).y_range(ydr).extra_y_ranges(Map(\"foo\" -> new DataRange1d().start(0).end(200000)))\n", "distPlot.width(800)\n", "distPlot.title(\"Distribution of Activity\")\n", "\n", "val xTick = new DatetimeTicker().desired_num_ticks(10)\n", "val yForm = new NumeralTickFormatter().format(\"0.0\")\n", "val xaxis = new DatetimeAxis().ticker(xTick).plot(distPlot)\n", "val yaxis = new LinearAxis().formatter(yForm).plot(distPlot)\n", "val rightYaxis = new LinearAxis().formatter(yForm).plot(distPlot).y_range_name(\"foo\")\n", "\n", "val gridX = new Grid().plot(distPlot).dimension(0).ticker(xaxis.ticker.value)\n", "val gridY = new Grid().plot(distPlot).dimension(1).ticker(yaxis.ticker.value)\n", "\n", "distPlot.below <<= (xaxis :: _)\n", "distPlot.left <<= (yaxis :: _)\n", "distPlot.right <<= (rightYaxis :: _)\n", "\n", "val dataList = List((distSource.tweets, Color.Blue, \"Tweets\"), (distSource.rt, Color.Blue, \"Retweets\"), (distSource.ments, Color.Red, \"Mentions\"), (distSource.urls, Color.Yellow, \"URLs\"), (distSource.media, Color.Green, \"Media\"), (distSource.tags, Color.Orange, \"Hashtags\"))\n", "var rendList = List[io.continuum.bokeh.GlyphRenderer]()\n", "var legendList = List[(String, List[io.continuum.bokeh.GlyphRenderer])]()\n", "\n", "for (dataTuple <- dataList) {\n", " val xData = distSource.dates\n", " val yData = dataTuple._1\n", " val color = dataTuple._2\n", " val name = dataTuple._3\n", " \n", " val line = new Line().x(xData).y(yData).line_color(color)\n", " val rendLine = new GlyphRenderer().data_source(distSource).glyph(line)\n", " \n", " rendList = rendList :+ rendLine\n", " legendList = legendList :+ (name -> List(rendLine))\n", "}\n", "\n", "val circle = new Circle().x(eventSource.x).y(eventSource.y).fill_color(Color.Red).size(10)\n", "val eventCircle = new GlyphRenderer().data_source(eventSource).glyph(circle)\n", "rendList = rendList :+ eventCircle\n", "legendList = legendList :+ (\"Event\" -> List(eventCircle))\n", "\n", "val countLine = new Line().x(distSource.dates).y(distSource.tCount).line_color(Color.Black)\n", "val rendCountLine = new GlyphRenderer().data_source(distSource).glyph(countLine).y_range_name(\"foo\")\n", "rendList = rendList :+ rendCountLine\n", "\n", "val legend = new Legend().plot(distPlot).legends(legendList)\n", "val pantool = new PanTool().plot(distPlot)\n", "val previewTool = new PreviewSaveTool().plot(distPlot)\n", "\n", "distPlot.renderers <<= (rendList ++ _)\n", "distPlot.renderers <<= (xaxis :: yaxis :: gridX :: gridY :: _)\n", "distPlot.renderers <<= (legend :: rightYaxis :: _)\n", "\n", "distPlot.tools := List(pantool, previewTool)\n", "\n", "val document = new Document(distPlot)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " BokehJS successfully loaded.\n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "kernel.display.html(document.fragment.preamble.toString)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", " \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "kernel.display.html(document.fragment.html.toString)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Toree", "language": "", "name": "toree" }, "language_info": { "name": "scala", "version": "2.10.4" } }, "nbformat": 4, "nbformat_minor": 0 }