<!DOCTYPE html> <html> <head> <meta name="databricks-html-version" content="1"> <title>022_TweetGenericCollector - Databricks</title> <meta charset="utf-8"> <meta name="google" content="notranslate"> <meta http-equiv="Content-Language" content="en"> <meta http-equiv="Content-Type" content="text/html; charset=UTF8"> <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700"> <link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/lib/css/bootstrap.min.css"> <link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/lib/jquery-ui-bundle/jquery-ui.min.css"> <link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/css/main.css"> <link rel="stylesheet" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/css/print.css" media="print"> <link rel="icon" type="image/png" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/img/favicon.ico"/> <script>window.settings = {"sparkDocsSearchGoogleCx":"004588677886978090460:_rj0wilqwdm","dbcForumURL":"http://forums.databricks.com/","dbfsS3Host":"https://databricks-prod-storage-sydney.s3.amazonaws.com","enableThirdPartyApplicationsUI":false,"enableClusterAcls":false,"notebookRevisionVisibilityHorizon":0,"enableTableHandler":true,"isAdmin":true,"enableLargeResultDownload":false,"nameAndEmail":"Raazesh Sainudiin (r.sainudiin@math.canterbury.ac.nz)","enablePresentationTimerConfig":true,"enableFullTextSearch":true,"enableElasticSparkUI":true,"clusters":true,"hideOffHeapCache":false,"applications":false,"useStaticGuide":false,"fileStoreBase":"FileStore","configurableSparkOptionsSpec":[{"keyPattern":"spark\\.kryo(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.kryo.*","valuePatternDisplay":"*","description":"Configuration options for Kryo serialization"},{"keyPattern":"spark\\.io\\.compression\\.codec","valuePattern":"(lzf|snappy|org\\.apache\\.spark\\.io\\.LZFCompressionCodec|org\\.apache\\.spark\\.io\\.SnappyCompressionCodec)","keyPatternDisplay":"spark.io.compression.codec","valuePatternDisplay":"snappy|lzf","description":"The codec used to compress internal data such as RDD partitions, broadcast variables and shuffle outputs."},{"keyPattern":"spark\\.serializer","valuePattern":"(org\\.apache\\.spark\\.serializer\\.JavaSerializer|org\\.apache\\.spark\\.serializer\\.KryoSerializer)","keyPatternDisplay":"spark.serializer","valuePatternDisplay":"org.apache.spark.serializer.JavaSerializer|org.apache.spark.serializer.KryoSerializer","description":"Class to use for serializing objects that will be sent over the network or need to be cached in serialized form."},{"keyPattern":"spark\\.rdd\\.compress","valuePattern":"(true|false)","keyPatternDisplay":"spark.rdd.compress","valuePatternDisplay":"true|false","description":"Whether to compress serialized RDD partitions (e.g. for StorageLevel.MEMORY_ONLY_SER). Can save substantial space at the cost of some extra CPU time."},{"keyPattern":"spark\\.speculation","valuePattern":"(true|false)","keyPatternDisplay":"spark.speculation","valuePatternDisplay":"true|false","description":"Whether to use speculation (recommended off for streaming)"},{"keyPattern":"spark\\.es(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"es(\\.([^\\.]+))+","valuePattern":".*","keyPatternDisplay":"es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"spark\\.(storage|shuffle)\\.memoryFraction","valuePattern":"0?\\.0*([1-9])([0-9])*","keyPatternDisplay":"spark.(storage|shuffle).memoryFraction","valuePatternDisplay":"(0.0,1.0)","description":"Fraction of Java heap to use for Spark's shuffle or storage"},{"keyPattern":"spark\\.streaming\\.backpressure\\.enabled","valuePattern":"(true|false)","keyPatternDisplay":"spark.streaming.backpressure.enabled","valuePatternDisplay":"true|false","description":"Enables or disables Spark Streaming's internal backpressure mechanism (since 1.5). This enables the Spark Streaming to control the receiving rate based on the current batch scheduling delays and processing times so that the system receives only as fast as the system can process. Internally, this dynamically sets the maximum receiving rate of receivers. This rate is upper bounded by the values `spark.streaming.receiver.maxRate` and `spark.streaming.kafka.maxRatePerPartition` if they are set."},{"keyPattern":"spark\\.streaming\\.receiver\\.maxRate","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.receiver.maxRate","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which each receiver will receive data. Effectively, each stream will consume at most this number of records per second. Setting this configuration to 0 or a negative number will put no limit on the rate. See the deployment guide in the Spark Streaming programing guide for mode details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRatePerPartition","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRatePerPartition","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which data will be read from each Kafka partition when using the Kafka direct stream API introduced in Spark 1.3. See the Kafka Integration guide for more details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRetries","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRetries","valuePatternDisplay":"numeric","description":"Maximum number of consecutive retries the driver will make in order to find the latest offsets on the leader of each partition (a default value of 1 means that the driver will make a maximum of 2 attempts). Only applies to the Kafka direct stream API introduced in Spark 1.3."},{"keyPattern":"spark\\.streaming\\.ui\\.retainedBatches","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.ui.retainedBatches","valuePatternDisplay":"numeric","description":"How many batches the Spark Streaming UI and status APIs remember before garbage collecting."}],"enableReactNotebookComments":true,"enableResetPassword":true,"enableJobsSparkUpgrade":true,"sparkVersions":[{"key":"1.3.x-ubuntu15.10","displayName":"Spark 1.3.0","packageLabel":"spark-1.3-jenkins-ip-10-30-9-162-U0c2673ac85-Sa2ee4664b2-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.4.x-ubuntu15.10","displayName":"Spark 1.4.1","packageLabel":"spark-1.4-jenkins-ip-10-30-9-162-U0c2673ac85-S33a1e4b9c6-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.5.x-ubuntu15.10","displayName":"Spark 1.5.2","packageLabel":"spark-1.5-jenkins-ip-10-30-9-162-U0c2673ac85-S5917a1044d-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.x-ubuntu15.10","displayName":"Spark 1.6.0","packageLabel":"spark-1.6-jenkins-ip-10-30-9-162-U0c2673ac85-Scabba801f3-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"master","displayName":"Spark master (dev)","packageLabel":"","upgradable":true,"deprecated":false,"customerVisible":false}],"enableRestrictedClusterCreation":false,"enableFeedback":false,"defaultNumWorkers":8,"serverContinuationTimeoutMillis":10000,"driverStderrFilePrefix":"stderr","driverStdoutFilePrefix":"stdout","enableSparkDocsSearch":true,"prefetchSidebarNodes":true,"sparkHistoryServerEnabled":true,"sanitizeMarkdownHtml":true,"enableIPythonImportExport":true,"enableNotebookHistoryDiffing":true,"branch":"2.12.3","accountsLimit":-1,"enableNotebookGitBranching":true,"local":false,"displayDefaultContainerMemoryGB":6,"deploymentMode":"production","useSpotForWorkers":false,"enableUserInviteWorkflow":false,"enableStaticNotebooks":true,"dbcGuideURL":"#workspace/databricks_guide/00 Welcome to Databricks","enableCssTransitions":true,"pricingURL":"https://databricks.com/product/pricing","enableClusterAclsConfig":false,"orgId":0,"enableNotebookGitVersioning":true,"files":"files/","enableDriverLogsUI":true,"disableLegacyDashboards":false,"enableWorkspaceAclsConfig":true,"dropzoneMaxFileSize":4096,"enableNewDashboardViews":false,"driverLog4jFilePrefix":"log4j","enableMavenLibraries":true,"displayRowLimit":1000,"defaultSparkVersion":{"key":"1.5.x-ubuntu15.10","displayName":"Spark 1.5.2","packageLabel":"spark-1.5-jenkins-ip-10-30-9-162-U0c2673ac85-S5917a1044d-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},"clusterPublisherRootId":5,"enableLatestJobRunResultPermalink":true,"disallowAddingAdmins":false,"enableSparkConfUI":true,"enableOrgSwitcherUI":false,"clustersLimit":-1,"enableJdbcImport":true,"logfiles":"logfiles/","enableWebappSharding":false,"enableClusterDeltaUpdates":true,"csrfToken":"3f4d8617-8d0d-47dd-a072-38dbe25947da","useFixedStaticNotebookVersionForDevelopment":false,"enableBasicReactDialogBoxes":true,"requireEmailUserName":true,"enableDashboardViews":false,"dbcFeedbackURL":"http://feedback.databricks.com/forums/263785-product-feedback","enableWorkspaceAclService":true,"someName":"Raazesh Sainudiin","enableWorkspaceAcls":true,"gitHash":"0c2673ac858e227cad536fdb45d140aeded238db","userFullname":"Raazesh Sainudiin","enableClusterCreatePage":false,"enableImportFromUrl":true,"enableMiniClusters":false,"enableWebSocketDeltaUpdates":true,"enableDebugUI":false,"showHiddenSparkVersions":false,"allowNonAdminUsers":true,"userId":100005,"dbcSupportURL":"","staticNotebookResourceUrl":"https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/","enableSparkPackages":true,"enableHybridClusterType":false,"enableNotebookHistoryUI":true,"availableWorkspaces":[{"name":"Workspace 0","orgId":0}],"enableFolderHtmlExport":true,"enableSparkVersionsUI":true,"databricksGuideStaticUrl":"","enableHybridClusters":true,"notebookLoadingBackground":"#fff","enableNewJobRunDetailsPage":true,"enableDashboardExport":true,"user":"r.sainudiin@math.canterbury.ac.nz","enableServerAutoComplete":true,"enableStaticHtmlImport":true,"defaultMemoryPerContainerMB":6000,"enablePresenceUI":true,"tablesPublisherRootId":7,"enableNewInputWidgetUI":false,"accounts":true,"enableNewProgressReportUI":true,"defaultCoresPerContainer":4};</script> <script>var __DATABRICKS_NOTEBOOK_MODEL = {"version":"NotebookV1","origId":119589,"name":"022_TweetGenericCollector","language":"scala","commands":[{"version":"CommandV1","origId":119591,"guid":"5503e66a-3b92-4157-b344-014e2510e3eb","subtype":"command","commandType":"auto","position":1.0,"command":"%md\n\n# [Scalable Data Science](http://www.math.canterbury.ac.nz/~r.sainudiin/courses/ScalableDataScience/)\n\n\n### prepared by [Raazesh Sainudiin](https://nz.linkedin.com/in/raazesh-sainudiin-45955845) and [Sivanand Sivaram](https://www.linkedin.com/in/sivanand)\n\n*supported by* [](https://databricks.com/)\nand \n[](https://www.awseducate.com/microsite/CommunitiesEngageHome)","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"aa953397-a25e-452b-8301-164e8b57517d"},{"version":"CommandV1","origId":119592,"guid":"eed38d0d-a21f-40e7-b4b2-58a64c587b6b","subtype":"command","commandType":"auto","position":1.5,"command":"%md\n\n# Generic Twitter Collector\n\n\nRemeber that the use of twitter itself comes with various strings attached. \n\n- **Read:** [Twitter Rules](https://twitter.com/rules)\n\n\nCrucially, the use of the content from twitter by you (as done in this worksheet) comes with some strings.\n- **Read:** [Developer Agreement & Policy Twitter Developer Agreement](https://dev.twitter.com/overview/terms/agreement-and-policy)\n\n\nThe [html source url](https://raw.githubusercontent.com/raazesh-sainudiin/scalable-data-science/master/db/week6/12_SparkStreaming/022_TweetGenericCollector.html) of this databricks notebook and the recorded Uji  of a closely related notebook:\n\n[](https://www.youtube.com/v/jqLcr2eS-Vs?rel=0&autoplay=1&modestbranding=1&start=2112&end=3535)\n\nThis notebook for collecting tweets is more robust than the notebook 022_TweetCollector.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"74f5abf9-3c34-4ed9-8504-61ce7a94b947"},{"version":"CommandV1","origId":119593,"guid":"d31d73b2-178c-4d98-9fd7-69a901d59e63","subtype":"command","commandType":"auto","position":1.75,"command":"import org.apache.spark._\nimport org.apache.spark.storage._\nimport org.apache.spark.streaming._\nimport org.apache.spark.streaming.twitter.TwitterUtils\n\nimport scala.math.Ordering\n\nimport twitter4j.auth.OAuthAuthorization\nimport twitter4j.conf.ConfigurationBuilder","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">import org.apache.spark._\nimport org.apache.spark.storage._\nimport org.apache.spark.streaming._\nimport org.apache.spark.streaming.twitter.TwitterUtils\nimport scala.math.Ordering\nimport twitter4j.auth.OAuthAuthorization\nimport twitter4j.conf.ConfigurationBuilder\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.463697416639E12,"submitTime":1.463697257888E12,"finishTime":1.463697416691E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"f4106e91-5022-4870-a25f-b9a2aacb3b71"},{"version":"CommandV1","origId":119594,"guid":"4f9e0c37-8146-48db-b36a-e9d13c24f58f","subtype":"command","commandType":"auto","position":1.875,"command":"%md\n\n### Step 1: Enter your Twitter API Credentials.\n* Go to https://apps.twitter.com and look up your Twitter API Credentials, or create an app to create them.\n* Run this cell for the input cells to appear.\n* Enter your credentials.\n* Run the cell again to pick up your defaults.\n\nThe cell-below is hidden to not expose the Twitter API Credentials: `consumerKey`, `consumerSecret`, `accessToken` and `accessTokenSecret`.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"8bf47714-2fc1-489a-bd80-020531430905"},{"version":"CommandV1","origId":119595,"guid":"4e1684a4-0c9e-4584-aea4-23bae1cbb942","subtype":"command","commandType":"auto","position":1.9375,"command":"System.setProperty(\"twitter4j.oauth.consumerKey\", getArgument(\"1. Consumer Key (API Key)\", \"\"))\nSystem.setProperty(\"twitter4j.oauth.consumerSecret\", getArgument(\"2. Consumer Secret (API Secret)\", \"\"))\nSystem.setProperty(\"twitter4j.oauth.accessToken\", getArgument(\"3. Access Token\", \"\"))\nSystem.setProperty(\"twitter4j.oauth.accessTokenSecret\", getArgument(\"4. Access Token Secret\", \"\"))","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.463697196842E12,"submitTime":1.463697038087E12,"finishTime":1.463697196892E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"beb85b85-1449-4abc-8c4b-e19fb913b669"},{"version":"CommandV1","origId":119596,"guid":"cccb18ba-dd35-4377-99ae-030f749f6bb4","subtype":"command","commandType":"auto","position":1.96875,"command":"%md\nIf you see warnings then ignore for now:\n[https://forums.databricks.com/questions/6941/change-in-getargument-for-notebook-input.html](https://forums.databricks.com/questions/6941/change-in-getargument-for-notebook-input.html).","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"e8a02822-0b7d-40a1-897f-b3d91b9aec1d"},{"version":"CommandV1","origId":119597,"guid":"cfb8034b-5913-4ebd-aa44-a25d4e87bfff","subtype":"command","commandType":"auto","position":1.97265625,"command":"%md\n\n### Step 2: Configure how long to collect and how often to write tweets to a file in s3 or dbfs\n* Run this cell for the input cells to appear.\n* Enter your credentials.\n* Run the cell again to pick up your defaults.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"5735705c-1ec9-4961-aa37-6f859eb778c1"},{"version":"CommandV1","origId":119598,"guid":"e5dc25ed-3cc8-4184-a550-75a68c7a4a0f","subtype":"command","commandType":"auto","position":1.974609375,"command":"60*60*24 // seconds in one day","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res50: Int = 86400\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461922004064E12,"submitTime":1.461921855021E12,"finishTime":1.461922004156E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"7c6452c2-320a-4e00-8ebd-918ce1005eaf"},{"version":"CommandV1","origId":119599,"guid":"9a6f397c-9c8e-40c8-ac7a-1eeea31fd47a","subtype":"command","commandType":"auto","position":1.9765625,"command":"val outputDirectory = getArgument(\"1. Output Directory\", \"/myTwitterDir\")\nval slideInterval = new Duration(getArgument(\"2. Save to file every N seconds\", \"300\").toInt * 1000) // 5 minutes by default\nval timeoutJobLength = getArgument(\"4. Wait this many seconds before stopping the streaming job\", \"86400\").toInt * 1000 // default is 1 day","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"<div class=\"ansiout\"><console>:33: error: not found: type Duration\n val slideInterval = new Duration(getArgument("2. Save to file every N seconds", "1").toInt * 1000)\n ^\n<console>:34: error: not found: type Duration\n val windowLength = new Duration(getArgument("3. Compute the top hashtags for the last N seconds", "5").toInt * 1000) // not used now\n ^\n</div>","error":null,"startTime":1.463697594726E12,"submitTime":1.463697435954E12,"finishTime":1.463697594802E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"35961df9-c42e-41b9-9c57-461c15901f64"},{"version":"CommandV1","origId":119600,"guid":"a5aa3ace-5a5f-4f97-ae2c-e8ba820b1d8d","subtype":"command","commandType":"auto","position":1.98046875,"command":"// Replace with your AWS S3 credentials\n//\n// NOTE: Set the access to this notebook appropriately to protect the security of your keys.\n// Or you can delete this cell after you run the mount command below once successfully.\n\nval AccessKey = getArgument(\"1. ACCESS_KEY\", \"REPLACE_WITH_YOUR_ACCESS_KEY\")\nval SecretKey = getArgument(\"2. SECRET_KEY\", \"REPLACE_WITH_YOUR_SECRET_KEY\")\nval EncodedSecretKey = SecretKey.replace(\"/\", \"%2F\")\nval AwsBucketName = getArgument(\"3. S3_BUCKET\", \"REPLACE_WITH_YOUR_S3_BUCKET\")\nval MountName = getArgument(\"4. MNT_NAME\", \"REPLACE_WITH_YOUR_MOUNT_NAME\")\nval s3Filename = \"tweetDump\"","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.463697467635E12,"submitTime":1.463697308883E12,"finishTime":1.463697467712E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"dfc09f1f-5be5-4fed-9e29-211a66dd3b58"},{"version":"CommandV1","origId":119601,"guid":"a693ae35-37e6-4d23-9827-10296ba212e1","subtype":"command","commandType":"auto","position":1.982421875,"command":"dbutils.fs.unmount(s\"/mnt/$MountName\") // finally unmount when done","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">/mnt/s3Data has been unmounted.\nres35: Boolean = true\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"java.rmi.RemoteException: Exception during RPC Mount(DbfsMountPoint(s3a://sds-twitter, /mnt/s3Data, NoEncryption()),None); nested exception is: ","error":"<div class=\"ansiout\">\tjava.lang.IllegalArgumentException: requirement failed: Directory already mounted: /mnt/s3Data\n\tat com.databricks.backend.daemon.data.client.DbfsClient.send0(DbfsClient.scala:71)\n\tat com.databricks.backend.daemon.data.client.DbfsClient.sendIdempotent(DbfsClient.scala:40)\n\tat com.databricks.backend.daemon.dbutils.DBUtilsCore.mount(DBUtilsCore.scala:298)\n\tat com.databricks.dbutils_v1.package$fs$.mount(dbutils_v1.scala:277)\nCaused by: java.lang.IllegalArgumentException: requirement failed: Directory already mounted: /mnt/s3Data\n\tat scala.Predef$.require(Predef.scala:233)\n\tat com.databricks.backend.daemon.data.server.DefaultMetadataManager.insertMount(MetadataManager.scala:118)\n\tat com.databricks.backend.daemon.data.server.handler.MountHandler.receive(MountHandler.scala:38)\n\tat com.databricks.backend.daemon.data.server.session.SessionContext$$anonfun$queryHandlers$1.apply(SessionContext.scala:54)\n\tat com.databricks.backend.daemon.data.server.session.SessionContext$$anonfun$queryHandlers$1.apply(SessionContext.scala:53)\n\tat scala.collection.immutable.List.foreach(List.scala:318)\n\tat com.databricks.backend.daemon.data.server.session.SessionContext.queryHandlers(SessionContext.scala:53)\n\tat com.databricks.backend.daemon.data.server.DbfsServerBackend$$anonfun$receive$1.applyOrElse(DbfsServerBackend.scala:186)\n\tat com.databricks.backend.daemon.data.server.DbfsServerBackend$$anonfun$receive$1.applyOrElse(DbfsServerBackend.scala:170)\n\tat scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33)\n\tat com.databricks.rpc.ServerBackend$$anonfun$internalReceive$1.applyOrElse(ServerBackend.scala:44)\n\tat com.databricks.rpc.ServerBackend$$anonfun$internalReceive$1.applyOrElse(ServerBackend.scala:39)\n\tat scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33)\n\tat com.databricks.rpc.ServerBackend$$anonfun$commonReceive$1.applyOrElse(ServerBackend.scala:57)\n\tat com.databricks.rpc.ServerBackend$$anonfun$commonReceive$1.applyOrElse(ServerBackend.scala:57)\n\tat scala.PartialFunction$OrElse.apply(PartialFunction.scala:162)\n\tat com.databricks.rpc.JettyServer$RequestManager$$anonfun$7.apply(JettyServer.scala:250)\n\tat scala.util.Try$.apply(Try.scala:161)\n\tat com.databricks.rpc.JettyServer$RequestManager.handleRPC(JettyServer.scala:250)\n\tat com.databricks.rpc.JettyServer$RequestManager.com$databricks$rpc$JettyServer$RequestManager$$handleRequestAndRespond(JettyServer.scala:193)\n\tat com.databricks.rpc.JettyServer$RequestManager$$anonfun$handleHttp$1.apply$mcV$sp(JettyServer.scala:139)\n\tat com.databricks.rpc.JettyServer$RequestManager$$anonfun$handleHttp$1.apply(JettyServer.scala:130)\n\tat com.databricks.rpc.JettyServer$RequestManager$$anonfun$handleHttp$1.apply(JettyServer.scala:130)\n\tat com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:83)\n\tat scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)\n\tat com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:78)\n\tat com.databricks.rpc.JettyServer$.withAttributionContext(JettyServer.scala:70)\n\tat com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:116)\n\tat com.databricks.rpc.JettyServer$.withAttributionTags(JettyServer.scala:70)\n\tat com.databricks.rpc.JettyServer$RequestManager.handleHttp(JettyServer.scala:129)\n\tat com.databricks.rpc.JettyServer$RequestManager.doGet(JettyServer.scala:85)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:735)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:848)\n\tat org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:684)\n\tat org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:501)\n\tat org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:430)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)\n\tat org.eclipse.jetty.server.Server.handle(Server.java:370)\n\tat org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:494)\n\tat org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:982)\n\tat org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1043)\n\tat org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:865)\n\tat org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)\n\tat org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:82)\n\tat org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:667)\n\tat org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:52)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)\n\tat java.lang.Thread.run(Thread.java:745)</div>","startTime":1.46192022411E12,"submitTime":1.461920075127E12,"finishTime":1.46192022553E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"b5e55f75-9710-41d5-8c16-c932ace3b81b"},{"version":"CommandV1","origId":119602,"guid":"cb5ad81f-428c-44cb-9a37-8b8a89cff6e3","subtype":"command","commandType":"auto","position":1.9833984375,"command":"dbutils.fs.mount(s\"s3a://$AccessKey:$EncodedSecretKey@$AwsBucketName\", s\"/mnt/$MountName\")","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res36: Boolean = true\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461920228231E12,"submitTime":1.46192007904E12,"finishTime":1.461920229119E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"8f6618af-2bb0-4f66-8243-5d05c182ad1a"},{"version":"CommandV1","origId":119603,"guid":"53bc966f-3e45-4c2b-b5c1-c563eef5cdf1","subtype":"command","commandType":"auto","position":1.983642578125,"command":"//dbutils.fs.rm(\"/mnt/s3Data/twitterNew/\",recurse=true)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res32: Boolean = true\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461919988404E12,"submitTime":1.461919839457E12,"finishTime":1.461919989219E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"a121d6dc-9547-48d9-ba47-6963af0f00dd"},{"version":"CommandV1","origId":119604,"guid":"baf43b10-7893-471f-a734-7d104c58a423","subtype":"command","commandType":"auto","position":1.98388671875,"command":"display(dbutils.fs.ls(\"/mnt/\"))","commandVersion":0,"state":"finished","results":{"type":"table","data":[["dbfs:/mnt/raj/","raj/",0.0],["dbfs:/mnt/s3Data/","s3Data/",0.0]],"arguments":{},"addedWidgets":{},"removedWidgets":[],"schema":[{"name":"path","type":"\"string\""},{"name":"name","type":"\"string\""},{"name":"size","type":"\"long\""}],"overflow":false,"aggData":[],"aggSchema":[],"aggOverflow":false,"aggSeriesLimitReached":false,"aggError":"","aggType":"","plotOptions":null,"isJsonSchema":true,"dbfsResultPath":null},"errorSummary":"java.io.FileNotFoundException: /mnt/s3Data","error":"<div class=\"ansiout\">\tat com.databricks.backend.daemon.data.client.DbfsClient.send0(DbfsClient.scala:63)\n\tat com.databricks.backend.daemon.data.client.DbfsClient.sendIdempotent(DbfsClient.scala:40)\n\tat com.databricks.backend.daemon.data.client.DatabricksFileSystem.listStatus(DatabricksFileSystem.scala:174)\n\tat com.databricks.backend.daemon.dbutils.FSUtils$.ls(DBUtilsCore.scala:60)\n\tat com.databricks.dbutils_v1.package$fs$.ls(dbutils_v1.scala:44)</div>","startTime":1.461920237974E12,"submitTime":1.461920089023E12,"finishTime":1.461920238886E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"715e5e8d-49a7-4ce4-8b0e-5a110aa8c7ff"},{"version":"CommandV1","origId":119605,"guid":"a6e14f95-fa17-4cd7-ae3c-3d85114d5efc","subtype":"command","commandType":"auto","position":1.984375,"command":"%md\n\n### Step 3: Run the Twitter Streaming job.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"8738e247-993f-4d64-ac01-98d6990c8325"},{"version":"CommandV1","origId":119606,"guid":"4fab13ba-b4b2-4841-a0c5-4878b1ee57ff","subtype":"command","commandType":"auto","position":1.98828125,"command":"%md\nClean up any old files.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"5dee4fe9-af6e-472f-908e-bb30e6d60512"},{"version":"CommandV1","origId":119607,"guid":"8714a7d8-f4aa-4419-8a93-0f4855f11b98","subtype":"command","commandType":"auto","position":1.9921875,"command":"//dbutils.fs.rm(outputDirectory, true) // do only if you want to remove","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.463697865179E12,"submitTime":1.463697706407E12,"finishTime":1.463697865254E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"e690f0e6-a3b3-45b9-9a58-fa4b75d55f87"},{"version":"CommandV1","origId":119608,"guid":"6f90a568-927b-4b27-9155-2f8ff646176f","subtype":"command","commandType":"auto","position":1.994140625,"command":"//dbutils.fs.rm(\"/mnt/s3Data/twitterNew/\",recurse=true)","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"bddd3f44-2f06-4bd7-bd8f-70f6e7108e6f"},{"version":"CommandV1","origId":119609,"guid":"db3b5815-006f-49bb-ae93-44498c1f4f57","subtype":"command","commandType":"auto","position":1.99609375,"command":"%md \nCreate the function to that creates the Streaming Context and sets up the streaming job.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"d761c81b-3904-445e-ab0b-64509e0c5c2b"},{"version":"CommandV1","origId":119610,"guid":"e5a27905-3c6f-4b2c-8623-be386d94847c","subtype":"command","commandType":"auto","position":1.9970703125,"command":"import com.google.gson.Gson // the Library has already been attached to this cluster (show live how to do this from scratch?)\n\nvar newContextCreated = false\nvar num = 0\nvar numTweetsCollected = 0L // track number of tweets collected\n\n// This is a helper class used for \nobject SecondValueOrdering extends Ordering[(String, Int)] {\n def compare(a: (String, Int), b: (String, Int)) = {\n a._2 compare b._2\n }\n}\n\n// This is the function that creates the SteamingContext and sets up the Spark Streaming job.\ndef creatingFunc(): StreamingContext = {\n // Create a Spark Streaming Context.\n val ssc = new StreamingContext(sc, slideInterval)\n // Create a Twitter Stream for the input source. \n val auth = Some(new OAuthAuthorization(new ConfigurationBuilder().build()))\n val twitterStream = TwitterUtils.createStream(ssc, auth)\n \n val twitterStreamJson = twitterStream.map(x => { val gson = new Gson();\n val xJson = gson.toJson(x)\n xJson\n }) \n \nval partitionsEachInterval = 1 // This tells the number of partitions in each RDD of tweets in the DStream.\n\ntwitterStreamJson.foreachRDD((rdd, time) => { // for each RDD in the DStream\n val count = rdd.count()\n if (count > 0) {\n val outputRDD = rdd.repartition(partitionsEachInterval) // repartition as desired\n //outputRDD.saveAsTextFile(s\"${outputDirectory}/tweets_\" + time.milliseconds.toString) // save as textfile\n outputRDD.saveAsTextFile(s\"/mnt/$MountName/${outputDirectory}\" + \"/tweets_\" + time.milliseconds.toString) // save as textfile in s3\n numTweetsCollected += count // update with the latest count\n }\n })\n \n newContextCreated = true\n ssc\n}","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">import com.google.gson.Gson\nnewContextCreated: Boolean = false\nnum: Int = 0\nnumTweetsCollected: Long = 0\ndefined module SecondValueOrdering\ncreatingFunc: ()org.apache.spark.streaming.StreamingContext\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461922040317E12,"submitTime":1.461921891285E12,"finishTime":1.461922040578E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"831500bf-35b2-4212-9144-e54b36bbce95"},{"version":"CommandV1","origId":119611,"guid":"a1cc21e7-a430-4d5a-bb76-e2da9e1dc442","subtype":"command","commandType":"auto","position":1.998046875,"command":"%md \nCreate the StreamingContext using getActiveOrCreate, as required when starting a streaming job in Databricks.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"45811743-91c4-44e4-9039-674098c9e028"},{"version":"CommandV1","origId":119612,"guid":"2a15330c-59a6-41aa-b77f-7a27de8d254a","subtype":"command","commandType":"auto","position":1.99853515625,"command":"val ssc = StreamingContext.getActiveOrCreate(creatingFunc)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">ssc: org.apache.spark.streaming.StreamingContext = org.apache.spark.streaming.StreamingContext@59cc7ab6\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461922045209E12,"submitTime":1.461921896158E12,"finishTime":1.46192204538E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"018e1db9-e3f6-46d1-884a-b286877c973b"},{"version":"CommandV1","origId":119613,"guid":"406084b8-4ac9-4fcd-9d15-e6fd4b4c955a","subtype":"command","commandType":"auto","position":1.998779296875,"command":"%md\n\nStart the Spark Streaming Context and return when the Streaming job exits or return with the specified timeout. ","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"db8eb2be-67f7-4fac-a65e-882b37b08e6c"},{"version":"CommandV1","origId":119614,"guid":"2073dfa2-ce9f-4c7f-8577-9690f9e0bf26","subtype":"command","commandType":"auto","position":1.9989013671875,"command":"//ssc.start()","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461919844288E12,"submitTime":1.461919695338E12,"finishTime":1.461919844593E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"fd88fb9d-1788-41b0-9951-6f0e7494de53"},{"version":"CommandV1","origId":119615,"guid":"9f3a876e-6206-41d7-8cc7-238b35788631","subtype":"command","commandType":"auto","position":1.9990234375,"command":"ssc.start()\nssc.awaitTerminationOrTimeout(timeoutJobLength)\nssc.stop(stopSparkContext = false)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461922054861E12,"submitTime":1.46192190582E12,"finishTime":1.462008455615E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"aa658479-3d39-454c-a1fa-76aad7047885"},{"version":"CommandV1","origId":119616,"guid":"812a936a-d045-4244-b4ba-c347a8c88f62","subtype":"command","commandType":"auto","position":1.999267578125,"command":"%md\nCheck out the Clusters 'Streaming` UI as the job is running.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"240e877e-f149-4b5d-902c-b0a8ce5f0d60"},{"version":"CommandV1","origId":119617,"guid":"44cb4075-875d-4e2e-8099-8feb18341021","subtype":"command","commandType":"auto","position":1.99951171875,"command":"%md\n\nStop any active Streaming Contexts, but don't stop the spark contexts they are attached to.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"982fc11c-a250-4dcb-96b3-07f7244c8156"},{"version":"CommandV1","origId":119618,"guid":"f4c5e706-734c-48ac-b539-819045b0baee","subtype":"command","commandType":"auto","position":1.9996337890625,"command":"StreamingContext.getActive.foreach { _.stop(stopSparkContext = false) }","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461906254574E12,"submitTime":1.461906106203E12,"finishTime":1.461906254711E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"36425eff-1e35-41af-857b-0a654e4bc8fa"},{"version":"CommandV1","origId":119619,"guid":"67fcd852-e11c-4a2e-a7c7-c8cf7164da48","subtype":"command","commandType":"auto","position":1.999755859375,"command":"%md\n\n### Step 4: View the Results.","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"ba655694-91d4-48eb-be92-59980cca20a3"},{"version":"CommandV1","origId":119620,"guid":"08f08496-597d-43c2-9b1b-4844022a0796","subtype":"command","commandType":"auto","position":1.99981689453125,"command":"display(dbutils.fs.ls(\"/mnt/s3Data/twitterNew/\"))","commandVersion":0,"state":"finished","results":{"type":"table","data":[["dbfs:/mnt/s3Data/twitterNew/tweets_1461920400000/","tweets_1461920400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461920700000/","tweets_1461920700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461921000000/","tweets_1461921000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461921300000/","tweets_1461921300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461922200000/","tweets_1461922200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461922500000/","tweets_1461922500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461922800000/","tweets_1461922800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461923100000/","tweets_1461923100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461923400000/","tweets_1461923400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461923700000/","tweets_1461923700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461924000000/","tweets_1461924000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461924300000/","tweets_1461924300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461924600000/","tweets_1461924600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461924900000/","tweets_1461924900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461925200000/","tweets_1461925200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461925500000/","tweets_1461925500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461925800000/","tweets_1461925800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461926100000/","tweets_1461926100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461926400000/","tweets_1461926400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461926700000/","tweets_1461926700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461927000000/","tweets_1461927000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461927300000/","tweets_1461927300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461927600000/","tweets_1461927600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461927900000/","tweets_1461927900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461928200000/","tweets_1461928200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461928500000/","tweets_1461928500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461928800000/","tweets_1461928800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461929100000/","tweets_1461929100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461929400000/","tweets_1461929400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461929700000/","tweets_1461929700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461930000000/","tweets_1461930000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461930300000/","tweets_1461930300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461930600000/","tweets_1461930600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461930900000/","tweets_1461930900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461931200000/","tweets_1461931200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461931500000/","tweets_1461931500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461931800000/","tweets_1461931800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461932100000/","tweets_1461932100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461932400000/","tweets_1461932400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461932700000/","tweets_1461932700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461933000000/","tweets_1461933000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461933300000/","tweets_1461933300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461933600000/","tweets_1461933600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461933900000/","tweets_1461933900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461934200000/","tweets_1461934200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461934500000/","tweets_1461934500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461934800000/","tweets_1461934800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461935100000/","tweets_1461935100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461935400000/","tweets_1461935400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461935700000/","tweets_1461935700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461936000000/","tweets_1461936000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461936300000/","tweets_1461936300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461936600000/","tweets_1461936600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461936900000/","tweets_1461936900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461937200000/","tweets_1461937200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461937500000/","tweets_1461937500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461937800000/","tweets_1461937800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461938100000/","tweets_1461938100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461938400000/","tweets_1461938400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461938700000/","tweets_1461938700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461939000000/","tweets_1461939000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461939300000/","tweets_1461939300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461939600000/","tweets_1461939600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461939900000/","tweets_1461939900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461940200000/","tweets_1461940200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461940500000/","tweets_1461940500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461940800000/","tweets_1461940800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461941100000/","tweets_1461941100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461941400000/","tweets_1461941400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461941700000/","tweets_1461941700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461942000000/","tweets_1461942000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461942300000/","tweets_1461942300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461942600000/","tweets_1461942600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461942900000/","tweets_1461942900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461943200000/","tweets_1461943200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461943500000/","tweets_1461943500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461943800000/","tweets_1461943800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461944100000/","tweets_1461944100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461944400000/","tweets_1461944400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461944700000/","tweets_1461944700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461945000000/","tweets_1461945000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461945300000/","tweets_1461945300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461945600000/","tweets_1461945600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461945900000/","tweets_1461945900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461946200000/","tweets_1461946200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461946500000/","tweets_1461946500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461946800000/","tweets_1461946800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461947100000/","tweets_1461947100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461947400000/","tweets_1461947400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461947700000/","tweets_1461947700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461948000000/","tweets_1461948000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461948300000/","tweets_1461948300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461948600000/","tweets_1461948600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461948900000/","tweets_1461948900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461949200000/","tweets_1461949200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461949500000/","tweets_1461949500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461949800000/","tweets_1461949800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461950100000/","tweets_1461950100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461950400000/","tweets_1461950400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461950700000/","tweets_1461950700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461951000000/","tweets_1461951000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461951300000/","tweets_1461951300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461951600000/","tweets_1461951600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461951900000/","tweets_1461951900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461952200000/","tweets_1461952200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461952500000/","tweets_1461952500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461952800000/","tweets_1461952800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461953100000/","tweets_1461953100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461953400000/","tweets_1461953400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461953700000/","tweets_1461953700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461954000000/","tweets_1461954000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461954300000/","tweets_1461954300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461954600000/","tweets_1461954600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461954900000/","tweets_1461954900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461955200000/","tweets_1461955200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461955500000/","tweets_1461955500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461955800000/","tweets_1461955800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461956100000/","tweets_1461956100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461956400000/","tweets_1461956400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461956700000/","tweets_1461956700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461957000000/","tweets_1461957000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461957300000/","tweets_1461957300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461957600000/","tweets_1461957600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461957900000/","tweets_1461957900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461958200000/","tweets_1461958200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461958500000/","tweets_1461958500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461958800000/","tweets_1461958800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461959100000/","tweets_1461959100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461959400000/","tweets_1461959400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461959700000/","tweets_1461959700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461960000000/","tweets_1461960000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461960300000/","tweets_1461960300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461960600000/","tweets_1461960600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461960900000/","tweets_1461960900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461961200000/","tweets_1461961200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461961500000/","tweets_1461961500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461961800000/","tweets_1461961800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461962100000/","tweets_1461962100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461962400000/","tweets_1461962400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461962700000/","tweets_1461962700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461963000000/","tweets_1461963000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461963300000/","tweets_1461963300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461963600000/","tweets_1461963600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461963900000/","tweets_1461963900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461964200000/","tweets_1461964200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461964500000/","tweets_1461964500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461964800000/","tweets_1461964800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461965100000/","tweets_1461965100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461965400000/","tweets_1461965400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461965700000/","tweets_1461965700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461966000000/","tweets_1461966000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461966300000/","tweets_1461966300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461966600000/","tweets_1461966600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461966900000/","tweets_1461966900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461967200000/","tweets_1461967200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461967500000/","tweets_1461967500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461967800000/","tweets_1461967800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461968100000/","tweets_1461968100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461968400000/","tweets_1461968400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461968700000/","tweets_1461968700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461969000000/","tweets_1461969000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461969300000/","tweets_1461969300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461969600000/","tweets_1461969600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461969900000/","tweets_1461969900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461970200000/","tweets_1461970200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461970500000/","tweets_1461970500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461970800000/","tweets_1461970800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461971100000/","tweets_1461971100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461971400000/","tweets_1461971400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461971700000/","tweets_1461971700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461972000000/","tweets_1461972000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461972300000/","tweets_1461972300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461972600000/","tweets_1461972600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461972900000/","tweets_1461972900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461973200000/","tweets_1461973200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461973500000/","tweets_1461973500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461973800000/","tweets_1461973800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461974100000/","tweets_1461974100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461974400000/","tweets_1461974400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461974700000/","tweets_1461974700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461975000000/","tweets_1461975000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461975300000/","tweets_1461975300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461975600000/","tweets_1461975600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461975900000/","tweets_1461975900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461976200000/","tweets_1461976200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461976500000/","tweets_1461976500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461976800000/","tweets_1461976800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461977100000/","tweets_1461977100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461977400000/","tweets_1461977400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461977700000/","tweets_1461977700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461978000000/","tweets_1461978000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461978300000/","tweets_1461978300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461978600000/","tweets_1461978600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461978900000/","tweets_1461978900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461979200000/","tweets_1461979200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461979500000/","tweets_1461979500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461979800000/","tweets_1461979800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461980100000/","tweets_1461980100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461980400000/","tweets_1461980400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461980700000/","tweets_1461980700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461981000000/","tweets_1461981000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461981300000/","tweets_1461981300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461981600000/","tweets_1461981600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461981900000/","tweets_1461981900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461982200000/","tweets_1461982200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461982500000/","tweets_1461982500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461982800000/","tweets_1461982800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461983100000/","tweets_1461983100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461983400000/","tweets_1461983400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461983700000/","tweets_1461983700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461984000000/","tweets_1461984000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461984300000/","tweets_1461984300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461984600000/","tweets_1461984600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461984900000/","tweets_1461984900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461985200000/","tweets_1461985200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461985500000/","tweets_1461985500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461985800000/","tweets_1461985800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461986100000/","tweets_1461986100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461986400000/","tweets_1461986400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461986700000/","tweets_1461986700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461987000000/","tweets_1461987000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461987300000/","tweets_1461987300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461987600000/","tweets_1461987600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461987900000/","tweets_1461987900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461988200000/","tweets_1461988200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461988500000/","tweets_1461988500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461988800000/","tweets_1461988800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461989100000/","tweets_1461989100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461989400000/","tweets_1461989400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461989700000/","tweets_1461989700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461990000000/","tweets_1461990000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461990300000/","tweets_1461990300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461990600000/","tweets_1461990600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461990900000/","tweets_1461990900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461991200000/","tweets_1461991200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461991500000/","tweets_1461991500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461991800000/","tweets_1461991800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461992100000/","tweets_1461992100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461992400000/","tweets_1461992400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461992700000/","tweets_1461992700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461993000000/","tweets_1461993000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461993300000/","tweets_1461993300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461993600000/","tweets_1461993600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461993900000/","tweets_1461993900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461994200000/","tweets_1461994200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461994500000/","tweets_1461994500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461994800000/","tweets_1461994800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461995100000/","tweets_1461995100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461995400000/","tweets_1461995400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461995700000/","tweets_1461995700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461996000000/","tweets_1461996000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461996300000/","tweets_1461996300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461996600000/","tweets_1461996600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461996900000/","tweets_1461996900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461997200000/","tweets_1461997200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461997500000/","tweets_1461997500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461997800000/","tweets_1461997800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461998100000/","tweets_1461998100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461998400000/","tweets_1461998400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461998700000/","tweets_1461998700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461999000000/","tweets_1461999000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461999300000/","tweets_1461999300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461999600000/","tweets_1461999600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1461999900000/","tweets_1461999900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462000200000/","tweets_1462000200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462000500000/","tweets_1462000500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462000800000/","tweets_1462000800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462001100000/","tweets_1462001100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462001400000/","tweets_1462001400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462001700000/","tweets_1462001700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462002000000/","tweets_1462002000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462002300000/","tweets_1462002300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462002600000/","tweets_1462002600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462002900000/","tweets_1462002900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462003200000/","tweets_1462003200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462003500000/","tweets_1462003500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462003800000/","tweets_1462003800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462004100000/","tweets_1462004100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462004400000/","tweets_1462004400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462004700000/","tweets_1462004700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462005000000/","tweets_1462005000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462005300000/","tweets_1462005300000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462005600000/","tweets_1462005600000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462005900000/","tweets_1462005900000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462006200000/","tweets_1462006200000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462006500000/","tweets_1462006500000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462006800000/","tweets_1462006800000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462007100000/","tweets_1462007100000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462007400000/","tweets_1462007400000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462007700000/","tweets_1462007700000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462008000000/","tweets_1462008000000/",0.0],["dbfs:/mnt/s3Data/twitterNew/tweets_1462008300000/","tweets_1462008300000/",0.0]],"arguments":{},"addedWidgets":{},"removedWidgets":[],"schema":[{"name":"path","type":"\"string\""},{"name":"name","type":"\"string\""},{"name":"size","type":"\"long\""}],"overflow":false,"aggData":[],"aggSchema":[],"aggOverflow":false,"aggSeriesLimitReached":false,"aggError":"","aggType":"","plotOptions":null,"isJsonSchema":true,"dbfsResultPath":null},"errorSummary":null,"error":null,"startTime":1.462046910315E12,"submitTime":1.462046756012E12,"finishTime":1.462046911099E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"4f90a141-5dd4-4de2-8b6b-2d768a145fcb"},{"version":"CommandV1","origId":119621,"guid":"2f0fd108-e7ac-4566-9d64-28ed9364c3f7","subtype":"command","commandType":"auto","position":1.999847412109375,"command":"val rdd1 = sc.textFile(\"/mnt/s3Data/twitterNew/tweets_1462008300000/part-00000\")","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">rdd1: org.apache.spark.rdd.RDD[String] = /mnt/s3Data/twitterNew/tweets_1462008300000/part-00000 MapPartitionsRDD[31485] at textFile at <console>:56\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.462046956183E12,"submitTime":1.46204680188E12,"finishTime":1.462046956296E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"e3dd2c87-2266-4246-92c1-82c33d6a7a71"},{"version":"CommandV1","origId":119622,"guid":"38cb520b-bf07-43ee-a156-1cc7bddafbd9","subtype":"command","commandType":"auto","position":1.9998626708984375,"command":"rdd1.count","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res55: Long = 10504\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.462047021835E12,"submitTime":1.462046867522E12,"finishTime":1.462047022625E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"b270a085-cb89-427c-99e4-7df25f7a5b9b"},{"version":"CommandV1","origId":119623,"guid":"0b306f19-4d41-4d40-ad5d-a0f76ebe9216","subtype":"command","commandType":"auto","position":1.9998703002929688,"command":"//rdd1.take(1) // uncomment to see first entry","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"<div class=\"ansiout\"><console>:58: error: missing arguments for method top in class RDD;\nfollow this method with `_' if you want to treat it as a partially applied function\n rdd1.top\n ^\n</div>","error":null,"startTime":1.463697737982E12,"submitTime":1.463697579216E12,"finishTime":1.463697738078E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"1a56e3a9-0402-447b-8b29-f64e08519f08"},{"version":"CommandV1","origId":119624,"guid":"6fc7f049-d199-4060-b4db-b3f6817db3e8","subtype":"command","commandType":"auto","position":1.9998779296875,"command":"display(dbutils.fs.ls(outputDirectory))","commandVersion":0,"state":"finished","results":{"type":"table","data":[["dbfs:/twitterNew/tweets_1461918420000/","tweets_1461918420000/",0.0],["dbfs:/twitterNew/tweets_1461918480000/","tweets_1461918480000/",0.0],["dbfs:/twitterNew/tweets_1461918540000/","tweets_1461918540000/",0.0],["dbfs:/twitterNew/tweets_1461918600000/","tweets_1461918600000/",0.0],["dbfs:/twitterNew/tweets_1461918660000/","tweets_1461918660000/",0.0]],"arguments":{},"addedWidgets":{},"removedWidgets":[],"schema":[{"name":"path","type":"\"string\""},{"name":"name","type":"\"string\""},{"name":"size","type":"\"long\""}],"overflow":false,"aggData":[],"aggSchema":[],"aggOverflow":false,"aggSeriesLimitReached":false,"aggError":"","aggType":"","plotOptions":null,"isJsonSchema":true,"dbfsResultPath":null},"errorSummary":null,"error":null,"startTime":1.461918687464E12,"submitTime":1.461918538574E12,"finishTime":1.461918687968E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"3ffd9561-f5dc-4748-a31e-d35bf7576334"},{"version":"CommandV1","origId":119625,"guid":"ff1c3106-4dbb-4204-9680-3e3eb8bd3f5c","subtype":"command","commandType":"auto","position":1.999908447265625,"command":"display(dbutils.fs.ls(s\"${outputDirectory}/tweets_1461918420000/\"))","commandVersion":0,"state":"finished","results":{"type":"table","data":[["dbfs:/twitterNew/tweets_1461918420000/_SUCCESS","_SUCCESS",0.0],["dbfs:/twitterNew/tweets_1461918420000/part-00000","part-00000",6092583.0]],"arguments":{},"addedWidgets":{},"removedWidgets":[],"schema":[{"name":"path","type":"\"string\""},{"name":"name","type":"\"string\""},{"name":"size","type":"\"long\""}],"overflow":false,"aggData":[],"aggSchema":[],"aggOverflow":false,"aggSeriesLimitReached":false,"aggError":"","aggType":"","plotOptions":null,"isJsonSchema":true,"dbfsResultPath":null},"errorSummary":null,"error":null,"startTime":1.461918850239E12,"submitTime":1.461918701341E12,"finishTime":1.46191885055E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"301a8f93-62c8-404e-b7fe-c5ad63661090"},{"version":"CommandV1","origId":119626,"guid":"4156272b-ab55-45cc-b7be-3803bf8f2315","subtype":"command","commandType":"auto","position":1.99993896484375,"command":"val rdd1 = sc.textFile(s\"${outputDirectory}/tweets_1461918420000/part-00000\")","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">rdd1: org.apache.spark.rdd.RDD[String] = /twitterNew/tweets_1461918420000/part-00000 MapPartitionsRDD[3835] at textFile at <console>:52\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"java.lang.IllegalArgumentException: Cannot head a directory: /twitterNew/tweets_1461917660000","error":"<div class=\"ansiout\">\tat com.databricks.backend.daemon.dbutils.FSUtils$.head(DBUtilsCore.scala:125)\n\tat com.databricks.dbutils_v1.package$fs$.head(dbutils_v1.scala:117)</div>","startTime":1.461918892494E12,"submitTime":1.461918743494E12,"finishTime":1.461918892606E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"6ed7cd64-0367-43fc-82a9-80c5fb856451"},{"version":"CommandV1","origId":119627,"guid":"d8448518-c092-4de7-9868-590a57073705","subtype":"command","commandType":"auto","position":1.999969482421875,"command":"rdd1.count","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res14: Long = 1642\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"startTime":1.461919048396E12,"submitTime":1.461918899488E12,"finishTime":1.461919048932E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"4ed8b787-331f-48ae-bf02-745c1cc3ed00"},{"version":"CommandV1","origId":119628,"guid":"2ea15974-4f61-4207-8522-56001cc86192","subtype":"command","commandType":"auto","position":1.9999847412109375,"command":"// rdd1.top(1) // uncomment to see top first entry","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\"></div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"<div class=\"ansiout\"><console>:56: error: missing arguments for method top in class RDD;\nfollow this method with `_' if you want to treat it as a partially applied function\n rdd1.top\n ^\n</div>","error":null,"startTime":1.463697790224E12,"submitTime":1.46369763144E12,"finishTime":1.4636977903E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"r.sainudiin@math.canterbury.ac.nz","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"099a65c4-5d0d-4d1d-ba67-e9fcb0f0dc07"},{"version":"CommandV1","origId":119629,"guid":"8d02b880-9790-4835-ab03-f81a5621491e","subtype":"command","commandType":"auto","position":2.0,"command":"%md\n\n# [Scalable Data Science](http://www.math.canterbury.ac.nz/~r.sainudiin/courses/ScalableDataScience/)\n\n\n### prepared by [Raazesh Sainudiin](https://nz.linkedin.com/in/raazesh-sainudiin-45955845) and [Sivanand Sivaram](https://www.linkedin.com/in/sivanand)\n\n*supported by* [](https://databricks.com/)\nand \n[](https://www.awseducate.com/microsite/CommunitiesEngageHome)","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"504d59a3-a8af-463f-8f12-3a03dedf7012"}],"dashboards":[],"guid":"915efe7a-0032-444c-bace-c23858923447","globalVars":{},"iPythonMetadata":null,"inputWidgets":{}};</script> <script src="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/js/notebook-main.js" onerror="window.mainJsLoadError = true;"></script> </head> <body> <script> if (window.mainJsLoadError) { var u = 'https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/js/notebook-main.js'; var b = document.getElementsByTagName('body')[0]; var c = document.createElement('div'); c.innerHTML = ('<h1>Network Error</h1>' + '<p><b>Please check your network connection and try again.</b></p>' + '<p>Could not load a required resource: ' + u + '</p>'); c.style.margin = '30px'; c.style.padding = '20px 50px'; c.style.backgroundColor = '#f5f5f5'; c.style.borderRadius = '5px'; b.appendChild(c); } </script> </body> </html>