{"paragraphs":[{"text":"%spark.dep\nz.reset()\nz.addRepo(\"apache-snapshots\").url(\"https://repository.apache.org/content/repositories/snapshots\").snapshot()\nz.load(\"org.apache.streams:streams-core:0.5-incubating-SNAPSHOT\")\nz.load(\"org.apache.streams:streams-converters:0.5-incubating-SNAPSHOT\")\nz.load(\"org.apache.streams:streams-pojo:0.5-incubating-SNAPSHOT\")\nz.load(\"org.apache.streams:streams-provider-twitter:0.5-incubating-SNAPSHOT\")\nz.load(\"org.apache.streams:streams-provider-facebook:0.5-incubating-SNAPSHOT\")\nz.load(\"org.apache.streams:streams-provider-youtube:0.5-incubating-SNAPSHOT\")\nz.load(\"org.apache.streams:google-gplus:0.5-incubating-SNAPSHOT\")","user":"anonymous","dateUpdated":"2016-12-09T12:08:13-0600","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481223147273_-335257509","id":"20161208-125227_1249271958","result":{"code":"SUCCESS","type":"TEXT","msg":"res0: org.apache.zeppelin.dep.Dependency = org.apache.zeppelin.dep.Dependency@27f0ba10\n"},"dateCreated":"2016-12-08T12:52:27-0600","dateStarted":"2016-12-09T12:08:14-0600","dateFinished":"2016-12-09T12:08:37-0600","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:357"},{"title":"","text":"%spark\nimport org.apache.streams.config._\nimport org.apache.streams.core._\nimport org.apache.youtube.pojo._\n\nimport com.typesafe.config._\nimport com.youtube.provider._\n\nimport java.util.Iterator","user":"anonymous","dateUpdated":"2016-12-09T12:09:28-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050362_1742456570","id":"20161115-182904_1821687700","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport org.apache.streams.config._\n\nimport org.apache.streams.core._\n\nimport org.apache.youtube.pojo._\n\nimport com.typesafe.config._\n\nimport com.youtube.provider._\n\nimport java.util.Iterator\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T12:09:29-0600","dateFinished":"2016-12-09T12:09:50-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:358"},{"title":"Build 'hocon', the Apache Streams Config","text":"%spark\n\nval apiKey = z.input(\"apiKey\", \"\")\nval serviceAccountEmailAddress = z.input(\"serviceAccountEmailAddress\", \"\")\nval pathToP12KeyFile = z.input(\"pathToP12KeyFile\", \"\")\n\nval credentials_hocon = s\"\"\"\nyoutube {\n apiKey = $apiKey\n oauth {\n serviceAccountEmailAddress = \"$serviceAccountEmailAddress\"\n pathToP12KeyFile = \"$pathToP12KeyFile\"\n }\n}\n\"\"\"\n","user":"anonymous","dateUpdated":"2016-12-09T12:09:54-0600","config":{"tableHide":false,"colWidth":12,"editorMode":"ace/mode/scala","editorHide":false,"title":true,"graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]},"height":300},"enabled":true},"settings":{"params":{"ApiKey":"","apiKey":"","Username":"rawkintrevo","ConsumerKey":"","AccessToken":"","ConsumerSecret":"","AccessTokenSecret":"","pathToP12KeyFile":"/home/rawkintrevo/Downloads/Apache Streams Demo-d89eb26912ae.p12","userId":"UCLDJ_V9KUOdOFSbDvPfGBxw","serviceAccountEmailAddress":""},"forms":{"apiKey":{"name":"apiKey","displayName":"apiKey","type":"input","defaultValue":"","hidden":false,"$$hashKey":"object:2060"},"serviceAccountEmailAddress":{"name":"serviceAccountEmailAddress","displayName":"serviceAccountEmailAddress","type":"input","defaultValue":"","hidden":false,"$$hashKey":"object:2062"},"pathToP12KeyFile":{"name":"pathToP12KeyFile","displayName":"pathToP12KeyFile","type":"input","defaultValue":"","hidden":false,"$$hashKey":"object:2061"}}},"apps":[],"jobName":"paragraph_1481054050366_1740917574","id":"20161023-191342_1492908722","result":{"code":"SUCCESS","type":"TEXT","msg":"\napiKey: Object = \n\nserviceAccountEmailAddress: Object = \n\npathToP12KeyFile: Object = /home/rawkintrevo/Downloads/Apache Streams Demo-d89eb26912ae.p12\n\n\n\n\n\n\n\n\n\n\ncredentials_hocon: String = \n\"\nyoutube {\n apiKey = \n oauth {\n serviceAccountEmailAddress = \"\"\n pathToP12KeyFile = \"/home/rawkintrevo/Downloads/Apache Streams Demo-d89eb26912ae.p12\"\n }\n}\n\"\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T12:09:55-0600","dateFinished":"2016-12-09T12:09:56-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:359"},{"title":"","text":"%spark\n// https://www.youtube.com/account_advanced\nval myChannel = z.input(\"my YouTube Channel\", \"\")\nval myId = z.input(\"my ID\", \"\")\n\nval accounts_hocon = s\"\"\"\nyoutube.youtubeUsers = [\n { userId = \"${myId}\"},\n { userId = \"${myChannel}\"}\n ]\n\"\"\"","user":"anonymous","dateUpdated":"2016-12-09T15:35:33-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","tableHide":false,"graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"height":300},"enabled":true},"settings":{"params":{"my ID":"adJyExT6JcrA0kvKNHrXZw","my YouTube Channel":"UCadJyExT6JcrA0kvKNHrXZw"},"forms":{"my YouTube Channel":{"name":"my YouTube Channel","displayName":"my YouTube Channel","type":"input","defaultValue":"","hidden":false,"$$hashKey":"object:3273"},"my ID":{"name":"my ID","displayName":"my ID","type":"input","defaultValue":"","hidden":false,"$$hashKey":"object:3272"}}},"apps":[],"jobName":"paragraph_1481054050369_1725912367","id":"20161115-175550_1569899821","result":{"code":"SUCCESS","type":"TEXT","msg":"\nmyChannel: Object = UCadJyExT6JcrA0kvKNHrXZw\n\nmyId: Object = adJyExT6JcrA0kvKNHrXZw\n\n\n\n\n\n\n\naccounts_hocon: String = \n\"\nyoutube.youtubeUsers = [\n { userId = \"adJyExT6JcrA0kvKNHrXZw\"},\n { userId = \"UCadJyExT6JcrA0kvKNHrXZw\"}\n ]\n\"\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T15:35:34-0600","dateFinished":"2016-12-09T15:35:35-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:360"},{"text":"%spark\nval accounts_hocon = s\"\"\"\nyoutube.youtubeUsers = [\n# Apache Software Foundation - Topic\n{ userId = \"UCegQNPmRCAJvCq6JfHUKZ9A\"},\n# Apache Software Foundation\n{ userId = \"TheApacheFoundation\"},\n# Apache Spark\n{ userId = \"TheApacheSpark\" },\n# Apache Spark - Topic\n{ userId = \"UCwhtqOdWyCuqOboj-E1bpFQ\"},\n# Apache Flink Berlin\n{ userId = \"UCY8_lgiZLZErZPF47a2hXMA\"},\n# Apache Syncope\n{ userId = \"UCkrSQVb5Qzb13crS1kCOiQQ\"},\n# Apache Accumulo\n{ userId = \"apacheaccumulo\"},\n# Apache Hive - Topic\n{ userId = \"UCIjbkZAX5VlvSKoSzNUHIoQ\"},\n# Apache HBase - Topic\n{ userId = \"UCcGNHRiO9bi6BeH5OdhY2Kw\"},\n# Apache Cassandra - Topic\n{ userId = \"UC6nsS04n_wBpCDXqSAkFM-w\"},\n# Apache Hadoop - Topic\n{ userId = \"UCgRu3LbCjczooTVI9VSvstg\"},\n# Apache Avro - Topic\n{ userId = \"UCzHCk8Gl5eP85xz0HwXjkzw\"},\n# Apache Maven - Topic\n{ userId = \"UCBS2s2cwx-MW9rVeKwee_VA\"},\n# Apache Oozie - Topic\n{ userId = \"UCRyBrviuu3qMNliolYXvC0g\"},\n]\n\"\"\"\n","user":"anonymous","dateUpdated":"2016-12-09T12:10:02-0600","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481229454157_1361944716","id":"20161208-143734_203618660","result":{"code":"SUCCESS","type":"TEXT","msg":"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\naccounts_hocon: String = \n\"\nyoutube.youtubeUsers = [\n# Apache Software Foundation - Topic\n{ userId = \"UCegQNPmRCAJvCq6JfHUKZ9A\"},\n# Apache Software Foundation\n{ userId = \"TheApacheFoundation\"},\n# Apache Spark\n{ userId = \"TheApacheSpark\" },\n# Apache Spark - Topic\n{ userId = \"UCwhtqOdWyCuqOboj-E1bpFQ\"},\n# Apache Flink Berlin\n{ userId = \"UCY8_lgiZLZErZPF47a2hXMA\"},\n# Apache Syncope\n{ userId = \"UCkrSQVb5Qzb13crS1kCOiQQ\"},\n# Apache Accumulo\n{ userId = \"apacheaccumulo\"},\n# Apache Hive - Topic\n{ userId = \"UCIjbkZAX5VlvSKoSzNUHIoQ\"},\n# Apache HBase - Topic\n{ userId = \"UCcGNHRiO9bi6BeH5OdhY2Kw\"},\n# Apache Cassandra - Topic\n{ userId = \"UC6nsS04n_wBpCDXqSAkFM-w\"},\n# Apache Hadoop - Topic\n{ userId = \"UCgRu3LbCjczooTVI9VSvstg\"},\n# Apache Avro - Topic\n{ userId = \"UCzHCk8Gl5eP85xz0HwXjkzw\"},\n# Apache ..."},"dateCreated":"2016-12-08T14:37:34-0600","dateStarted":"2016-12-09T12:10:02-0600","dateFinished":"2016-12-09T12:10:03-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:361"},{"title":"","text":"%spark\nval reference = ConfigFactory.load()\nval credentials = ConfigFactory.parseString(credentials_hocon)\nval accounts = ConfigFactory.parseString(accounts_hocon)\nval typesafe = accounts.withFallback(credentials).withFallback(reference).resolve()\nval youtubeConfiguration = new ComponentConfigurator(classOf[YoutubeConfiguration]).detectConfiguration(typesafe, \"youtube\");\n","user":"anonymous","dateUpdated":"2016-12-09T15:35:43-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","tableHide":false,"graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050372_1724758120","id":"20161115-182824_268335435","result":{"code":"SUCCESS","type":"TEXT","msg":"reference: com.typesafe.config.Config = Config(SimpleConfigObject({\"akka\":{\"actor\":{\"creation-timeout\":\"20s\",\"debug\":{\"autoreceive\":\"off\",\"event-stream\":\"off\",\"fsm\":\"off\",\"lifecycle\":\"off\",\"receive\":\"off\",\"router-misconfiguration\":\"off\",\"unhandled\":\"off\"},\"default-dispatcher\":{\"attempt-teamwork\":\"on\",\"default-executor\":{\"fallback\":\"fork-join-executor\"},\"executor\":\"default-executor\",\"fork-join-executor\":{\"parallelism-factor\":3,\"parallelism-max\":64,\"parallelism-min\":8,\"task-peeking-mode\":\"FIFO\"},\"mailbox-requirement\":\"\",\"shutdown-timeout\":\"1s\",\"thread-pool-executor\":{\"allow-core-timeout\":\"on\",\"core-pool-size-factor\":3,\"core-pool-size-max\":64,\"core-pool-size-min\":8,\"keep-alive-time\":\"60s\",\"max-pool-size-factor\":3,\"max-pool-size-max\":64,\"max-pool-size-min\":8,\"task-queue-size\":-1,\"task-queue...\ncredentials: com.typesafe.config.Config = Config(SimpleConfigObject({\"youtube\":{\"apiKey\":\"\",\"oauth\":{\"pathToP12KeyFile\":\"/home/rawkintrevo/Downloads/Apache Streams Demo-d89eb26912ae.p12\",\"serviceAccountEmailAddress\":\"\"}}}))\n\naccounts: com.typesafe.config.Config = Config(SimpleConfigObject({\"youtube\":{\"youtubeUsers\":[{\"userId\":\"adJyExT6JcrA0kvKNHrXZw\"},{\"userId\":\"UCadJyExT6JcrA0kvKNHrXZw\"}]}}))\ntypesafe: com.typesafe.config.Config = Config(SimpleConfigObject({\"akka\":{\"actor\":{\"creation-timeout\":\"20s\",\"debug\":{\"autoreceive\":\"off\",\"event-stream\":\"off\",\"fsm\":\"off\",\"lifecycle\":\"off\",\"receive\":\"off\",\"router-misconfiguration\":\"off\",\"unhandled\":\"off\"},\"default-dispatcher\":{\"attempt-teamwork\":\"on\",\"default-executor\":{\"fallback\":\"fork-join-executor\"},\"executor\":\"default-executor\",\"fork-join-executor\":{\"parallelism-factor\":3,\"parallelism-max\":64,\"parallelism-min\":8,\"task-peeking-mode\":\"FIFO\"},\"mailbox-requirement\":\"\",\"shutdown-timeout\":\"1s\",\"thread-pool-executor\":{\"allow-core-timeout\":\"on\",\"core-pool-size-factor\":3,\"core-pool-size-max\":64,\"core-pool-size-min\":8,\"keep-alive-time\":\"60s\",\"max-pool-size-factor\":3,\"max-pool-size-max\":64,\"max-pool-size-min\":8,\"task-queue-size\":-1,\"task-queue-...youtubeConfiguration: org.apache.youtube.pojo.YoutubeConfiguration = org.apache.youtube.pojo.YoutubeConfiguration@6e257891[protocol=,host=,port=,version=,endpoint=,apiKey=,follow=[],youtubeUsers=[org.apache.streams.google.gplus.configuration.UserInfo@44922330[userId=adJyExT6JcrA0kvKNHrXZw,afterDate=,beforeDate=,additionalProperties={}], org.apache.streams.google.gplus.configuration.UserInfo@4bdaf062[userId=UCadJyExT6JcrA0kvKNHrXZw,afterDate=,beforeDate=,additionalProperties={}]],defaultAfterDate=,defaultBeforeDate=,oauth=org.apache.streams.google.gplus.GPlusOAuthConfiguration@4837d171[appName=,pathToP12KeyFile=/home/rawkintrevo/Downloads/Apache Streams Demo-d89eb26912ae.p12,se..."},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T15:35:43-0600","dateFinished":"2016-12-09T15:35:47-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:362"},{"title":"","text":"%spark\n// Pull info on those channels\nval youtubeChannelProvider = new YoutubeChannelProvider(youtubeConfiguration);\nyoutubeChannelProvider.prepare(null)\nyoutubeChannelProvider.startStream()\n//\n\nval channel_buf = scala.collection.mutable.ArrayBuffer.empty[Object]\nwhile( youtubeChannelProvider.isRunning()) {\n//while ( resultSet.size() < 10){\n val resultSet = youtubeChannelProvider.readCurrent()\n resultSet.size()\n val iterator = resultSet.iterator();\n while(iterator.hasNext()) {\n val datum = iterator.next();\n channel_buf += datum.getDocument\n }\n}\nchannel_buf.size ","user":"anonymous","dateUpdated":"2016-12-09T15:35:48-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050375_1725142869","id":"20161115-183815_254751684","result":{"code":"SUCCESS","type":"TEXT","msg":"\nyoutubeChannelProvider: com.youtube.provider.YoutubeChannelProvider = com.youtube.provider.YoutubeChannelProvider@6d4f8ba6\n\nchannel_buf: scala.collection.mutable.ArrayBuffer[Object] = ArrayBuffer()\n\nres45: Int = 1\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T15:35:48-0600","dateFinished":"2016-12-09T15:35:52-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:363"},{"title":"","text":"%spark\nimport com.typesafe.config._\nimport org.apache.streams.config._\nimport org.apache.streams.core._\nimport com.youtube.provider._\nimport org.apache.youtube.pojo._\nimport java.util.Iterator\n\nval buf = scala.collection.mutable.ArrayBuffer.empty[Object]\n\nval provider = new YoutubeUserActivityProvider(youtubeConfiguration);\nprovider.prepare(null)\nprovider.startStream()\nwhile(provider.isRunning()) {\n val resultSet = provider.readCurrent()\n resultSet.size()\n val iterator = resultSet.iterator();\n while(iterator.hasNext()) {\n val datum = iterator.next();\n //println(datum.getDocument)\n buf += datum.getDocument\n } \n}\nbuf.size","user":"anonymous","dateUpdated":"2016-12-09T15:35:53-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]},"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050376_1723219125","id":"20161025-184115_-1493939533","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport com.typesafe.config._\n\nimport org.apache.streams.config._\n\nimport org.apache.streams.core._\n\nimport com.youtube.provider._\n\nimport org.apache.youtube.pojo._\n\nimport java.util.Iterator\n\nbuf: scala.collection.mutable.ArrayBuffer[Object] = ArrayBuffer()\n\nprovider: com.youtube.provider.YoutubeUserActivityProvider = com.youtube.provider.YoutubeUserActivityProvider@8761209\n\nres49: Int = 1\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T15:35:54-0600","dateFinished":"2016-12-09T15:36:03-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:365"},{"text":"%spark\n","dateUpdated":"2016-12-09T16:01:09-0600","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/text","editorHide":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481230692418_-697858044","id":"20161208-145812_1796603409","dateCreated":"2016-12-08T14:58:12-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:366","user":"anonymous","dateFinished":"2016-12-09T16:01:04-0600","dateStarted":"2016-12-09T16:01:03-0600","result":{"code":"SUCCESS","type":"HTML","msg":"
\n

Processing

\n
"}},{"text":"%spark\nimport org.apache.spark.sql.functions._\nimport org.apache.spark.sql.UserDefinedFunction\nimport java.util.regex.Pattern\n\nval toLowerCase = udf {\n (text: String) => text.toLowerCase\n}\n\nval removeLineBreaks = udf {\n (text: String) =>\n val regex = \"[\\\\n\\\\r]\"\n val pattern = Pattern.compile(regex)\n val matcher = pattern.matcher(text)\n \n // Remove all matches, split at whitespace (repeated whitespace is allowed) then join again.\n val cleanedText = matcher.replaceAll(\" \").split(\"[ ]+\").mkString(\" \")\n \n cleanedText \n}\n\nval removePunctuationAndSpecialChar = udf {\n (text: String) =>\n val regex = \"[\\\\.\\\\,\\\\:\\\\-\\\\!\\\\?\\\\n\\\\t,\\\\%\\\\#\\\\*\\\\|\\\\=\\\\(\\\\)\\\\\\\"\\\\>\\\\<\\\\/\\\\'\\\\`\\\\&]\"\n val pattern = Pattern.compile(regex)\n val matcher = pattern.matcher(text)\n \n // Remove all matches, split at whitespace (repeated whitespace is allowed) then join again.\n val cleanedText = matcher.replaceAll(\" \").split(\"[ ]+\").mkString(\" \")\n \n cleanedText \n}","user":"anonymous","dateUpdated":"2016-12-09T12:22:49-0600","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481229174142_161145044","id":"20161208-143254_1633595174","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport org.apache.spark.sql.functions._\n\nimport org.apache.spark.sql.UserDefinedFunction\n\nimport java.util.regex.Pattern\n\ntoLowerCase: org.apache.spark.sql.UserDefinedFunction = UserDefinedFunction(,StringType,List(StringType))\n\nremoveLineBreaks: org.apache.spark.sql.UserDefinedFunction = UserDefinedFunction(,StringType,List(StringType))\n\nremovePunctuationAndSpecialChar: org.apache.spark.sql.UserDefinedFunction = UserDefinedFunction(,StringType,List(StringType))\n"},"dateCreated":"2016-12-08T14:32:54-0600","dateStarted":"2016-12-09T12:22:49-0600","dateFinished":"2016-12-09T12:22:55-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:368","focus":true},{"title":"","text":"%spark\nimport org.apache.streams.core.StreamsDatum\nimport com.youtube.processor._\nimport scala.collection.JavaConversions._\n//Normalize activities -> posts(s)\nval YoutubeTypeConverter = new YoutubeTypeConverter()\nYoutubeTypeConverter.prepare()\n\nval pages_datums = channel_buf.flatMap(x => YoutubeTypeConverter.process(new StreamsDatum(x)))","user":"anonymous","dateUpdated":"2016-12-09T15:36:04-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050377_1722834376","id":"20161115-183950_1113670086","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport org.apache.streams.core.StreamsDatum\n\nimport com.youtube.processor._\n\nimport scala.collection.JavaConversions._\n\nYoutubeTypeConverter: com.youtube.processor.YoutubeTypeConverter = com.youtube.processor.YoutubeTypeConverter@22a56592\npages_datums: scala.collection.mutable.ArrayBuffer[org.apache.streams.core.StreamsDatum] = ArrayBuffer(StreamsDatum{timestamp=null, sequenceid=null, metadata={}, document=org.apache.streams.pojo.json.Activity@3cabe98c[id=,actor=org.apache.streams.pojo.json.ActivityObject@17d46fba[id=id:youtube:UCadJyExT6JcrA0kvKNHrXZw,image=org.apache.streams.pojo.json.Image@4ad1b92b[additionalProperties={},duration=,height=,width=,url=https://yt3.ggpht.com/-QI9jUOjw9hw/AAAAAAAAAAI/AAAAAAAAAAA/MQ0f54LtVWs/s240-c-k-no-mo-rj-c0xffffff/photo.jpg,additionalProperties={}],displayName=Trevor Grant,summary=,content=,url=https://youtube.com/user/UCadJyExT6JcrA0kvKNHrXZw,objectType=,author=,published=,updated=,attachments=[],upstreamDuplicates=[],downstreamDu..."},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T15:36:04-0600","dateFinished":"2016-12-09T15:36:10-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:367"},{"title":"","text":"%spark\nimport org.apache.streams.jackson.StreamsJacksonMapper;\nimport sqlContext._\nimport sqlContext.implicits._\n\nval mapper = StreamsJacksonMapper.getInstance();\nval pages_jsons = pages_datums.map(o => mapper.writeValueAsString(o.getDocument))\nval pagesRDD = sc.parallelize(pages_jsons)\n\nval pagesDF = sqlContext.read.json(pagesRDD)\n\nval cleanDF = pagesDF.withColumn(\"summary\", removePunctuationAndSpecialChar(pagesDF(\"actor.summary\")))\ncleanDF.registerTempTable(\"youtube_pages\")\ncleanDF.printSchema","user":"anonymous","dateUpdated":"2016-12-09T15:36:14-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050378_1723988623","id":"20161115-183944_1706309463","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport org.apache.streams.jackson.StreamsJacksonMapper\n\nimport sqlContext._\n\nimport sqlContext.implicits._\n\nmapper: org.apache.streams.jackson.StreamsJacksonMapper = org.apache.streams.jackson.StreamsJacksonMapper@52e88281\npages_jsons: scala.collection.mutable.ArrayBuffer[String] = ArrayBuffer({\"actor\":{\"id\":\"id:youtube:UCadJyExT6JcrA0kvKNHrXZw\",\"image\":{\"url\":\"https://yt3.ggpht.com/-QI9jUOjw9hw/AAAAAAAAAAI/AAAAAAAAAAA/MQ0f54LtVWs/s240-c-k-no-mo-rj-c0xffffff/photo.jpg\"},\"displayName\":\"Trevor Grant\",\"summary\":\"\",\"url\":\"https://youtube.com/user/UCadJyExT6JcrA0kvKNHrXZw\",\"extensions\":{\"followers\":1,\"posts\":2}},\"verb\":\"post\",\"provider\":{\"id\":\"id:providers:youtube\",\"displayName\":\"YouTube\"},\"extensions\":{\"youtube\":{\"contentDetails\":{\"relatedPlaylists\":{\"likes\":\"LLadJyExT6JcrA0kvKNHrXZw\",\"uploads\":\"UUadJyExT6JcrA0kvKNHrXZw\"}},\"etag\":\"\\\"gMxXHe-zinKdE9lTnzKu8vjcmDI/zaadgos4wp9iQPFK80Ux0_Hbvas\\\"\",\"id\":\"UCadJyExT6JcrA0kvKNHrXZw\",\"kind\":\"youtube#channel\",\"snippet\":{\"description\":\"\",\"localized\":{\"description\":\"\",\"titl...\npagesRDD: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[93] at parallelize at :208\npagesDF: org.apache.spark.sql.DataFrame = [actor: struct,id:string,image:struct,summary:string,url:string>, extensions: struct>,etag:string,id:string,kind:string,snippet:struct,publishedAt:struct,thumbnails:struct,high:struct,medium:struct>,title:string>,statistics:struct,topicDetails:struct>>>, provider: struct,id:string,image:struct,summary:string,url:string>, extensions: struct>,etag:string,id:string,kind:string,snippet:struct,publishedAt:struct,thumbnails:struct,high:struct,medium:struct>,title:string>,statistics:struct,topicDetails:struct>>>, provider: struct posts(s)\nval YoutubeTypeConverter = new YoutubeTypeConverter()\nYoutubeTypeConverter.prepare()\n\nval useractivity_posts = buf.flatMap(x => YoutubeTypeConverter.process(new StreamsDatum(x)))\n","user":"anonymous","dateUpdated":"2016-12-09T15:36:30-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]},"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050379_1723603874","id":"20161023-191634_-1435819396","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport org.apache.streams.core.StreamsDatum\n\nimport com.youtube.processor._\n\nimport scala.collection.JavaConversions._\n\nYoutubeTypeConverter: com.youtube.processor.YoutubeTypeConverter = com.youtube.processor.YoutubeTypeConverter@2635f30a\n\n\n\nuseractivity_posts: scala.collection.mutable.ArrayBuffer[org.apache.streams.core.StreamsDatum] = \nArrayBuffer(StreamsDatum{timestamp=null, sequenceid=null, metadata={}, document=org.apache.streams.pojo.json.Activity@7864cbae[id=id:youtube:post:XAderH4bTyY,actor=org.apache.streams.pojo.json.ActivityObject@31423ca6[id=id:youtube:UCadJyExT6JcrA0kvKNHrXZw,image=,displayName=Trevor Grant,summary=Trevor Grant presents \"Apache Mahout? What's Next!\" at Chicago Hadoop User Group (CHUG) November 17, 2016\n\nhttps://github.com/rawkintrevo/presentations/blob/master/Mahout%20Whats%20Next%20CHUG%202016-11-17.pdf,content=,url=,objectType=,author=,published=,updated=,attachments=[],upstreamDuplicates=[],downstreamDuplicates=[],additionalProperties={handle=Trevor ..."},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T15:36:30-0600","dateFinished":"2016-12-09T15:36:37-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:370"},{"title":"","text":"%spark\nimport org.apache.streams.jackson.StreamsJacksonMapper;\nimport sqlContext._\nimport sqlContext.implicits._\n\nval mapper = StreamsJacksonMapper.getInstance();\nval jsons = useractivity_posts.map(o => mapper.writeValueAsString(o.getDocument))\nval activitiesRDD = sc.parallelize(jsons)\n\nval activitiesDF = sqlContext.read.json(activitiesRDD)\n\nval cleanDF = activitiesDF.withColumn(\"content\", removePunctuationAndSpecialChar(activitiesDF(\"content\")))\ncleanDF.registerTempTable(\"youtube_posts\")\ncleanDF.printSchema","user":"anonymous","dateUpdated":"2016-12-09T15:58:47-0600","config":{"colWidth":12,"editorMode":"ace/mode/scala","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]},"height":300},"enabled":true,"editorHide":false,"tableHide":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050380_1721680129","id":"20161023-191819_1405823231","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport org.apache.streams.jackson.StreamsJacksonMapper\n\nimport sqlContext._\n\nimport sqlContext.implicits._\n\nmapper: org.apache.streams.jackson.StreamsJacksonMapper = org.apache.streams.jackson.StreamsJacksonMapper@52e88281\njsons: scala.collection.mutable.ArrayBuffer[String] = ArrayBuffer({\"id\":\"id:youtube:post:XAderH4bTyY\",\"actor\":{\"id\":\"id:youtube:UCadJyExT6JcrA0kvKNHrXZw\",\"displayName\":\"Trevor Grant\",\"summary\":\"Trevor Grant presents \\\"Apache Mahout? What's Next!\\\" at Chicago Hadoop User Group (CHUG) November 17, 2016\\n\\nhttps://github.com/rawkintrevo/presentations/blob/master/Mahout%20Whats%20Next%20CHUG%202016-11-17.pdf\",\"handle\":\"Trevor Grant\"},\"verb\":\"post\",\"object\":{\"image\":{\"height\":480,\"width\":640,\"url\":\"https://i.ytimg.com/vi/XAderH4bTyY/sddefault.jpg\"},\"url\":\"https://www.youtube.com/watch?v=XAderH4bTyY\",\"objectType\":\"video\"},\"published\":\"2016-11-18T21:54:42.000Z\",\"provider\":{\"id\":\"id:providers:youtube\",\"displayName\":\"YouTube\"},\"title\":\"Apache Mahout? What's Next!\",\"content\":\"Trevor Grant present...\nactivitiesRDD: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[96] at parallelize at :222\n\nactivitiesDF: org.apache.spark.sql.DataFrame = [actor: struct, content: string, id: string, likes: struct, object: struct,objectType:string,url:string>, provider: struct, published: string, title: string, url: string, verb: string, youtube: struct,thumbnails:struct>,title:string>,statistics:struct>]\n\ncleanDF: org.apache.spark.sql.DataFrame = [actor: struct, content: string, id: string, likes: struct, object: struct,objectType:string,url:string>, provider: struct, published: string, title: string, url: string, verb: string, youtube: struct,thumbnails:struct>,title:string>,statistics:struct>]\nroot\n |-- actor: struct (nullable = true)\n | |-- displayName: string (nullable = true)\n | |-- handle: string (nullable = true)\n | |-- id: string (nullable = true)\n | |-- summary: string (nullable = true)\n |-- content: string (nullable = true)\n |-- id: string (nullable = true)\n |-- likes: struct (nullable = true)\n | |-- count: long (nullable = true)\n |-- object: struct (nullable = true)\n | |-- image: struct (nullable = true)\n | | |-- height: long (nullable = true)\n | | |-- url: string (nullable = true)\n | | |-- width: long (nullable = true)\n | |-- objectType: string (nullable = true)\n | |-- url: string (nullable = true)\n |-- provider: struct (nullable = true)\n | |-- displayName: string (nullable = true)\n | |-- id: string (nullable = true)\n |-- published: string (nullable = true)\n |-- title: string (nullable = true)\n |-- url: string (nullable = true)\n |-- verb: string (nullable = true)\n |-- youtube: struct (nullable = true)\n | |-- etag: string (nullable = true)\n | |-- id: string (nullable = true)\n | |-- kind: string (nullable = true)\n | |-- snippet: struct (nullable = true)\n | | |-- channelId: string (nullable = true)\n | | |-- channelTitle: string (nullable = true)\n | | |-- description: string (nullable = true)\n | | |-- publishedAt: struct (nullable = true)\n | | | |-- dateOnly: boolean (nullable = true)\n | | | |-- timeZoneShift: long (nullable = true)\n | | | |-- value: long (nullable = true)\n | | |-- thumbnails: struct (nullable = true)\n | | | |-- default: struct (nullable = true)\n | | | | |-- height: long (nullable = true)\n | | | | |-- url: string (nullable = true)\n | | | | |-- width: long (nullable = true)\n | | |-- title: string (nullable = true)\n | |-- statistics: struct (nullable = true)\n | | |-- commentCount: long (nullable = true)\n | | |-- dislikeCount: long (nullable = true)\n | | |-- favoriteCount: long (nullable = true)\n | | |-- likeCount: long (nullable = true)\n | | |-- viewCount: long (nullable = true)\n\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T15:58:47-0600","dateFinished":"2016-12-09T15:58:58-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:371"},{"text":"%spark.sql\n\nselect\nactor.id\n, actor.extensions.followers\n, extensions.youtube.kind\n, extensions.youtube.statistics.viewCount\n, extensions.youtube.contentDetails.relatedPlaylists.likes\n, extensions.youtube.contentDetails.relatedPlaylists.uploads\nfrom youtube_pages","dateUpdated":"2016-12-09T13:38:14-0600","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[{"name":"id","index":0,"aggr":"sum"}],"values":[{"name":"followers","index":1,"aggr":"sum"}],"groups":[],"scatter":{"xAxis":{"name":"id","index":0,"aggr":"sum"}}},"enabled":true,"editorMode":"ace/mode/sql"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481311603996_-1737864887","id":"20161209-132643_1460882062","dateCreated":"2016-12-09T13:26:43-0600","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:2520","user":"anonymous","dateFinished":"2016-12-09T13:38:15-0600","dateStarted":"2016-12-09T13:38:15-0600","result":{"code":"SUCCESS","type":"TABLE","msg":"id\tfollowers\tkind\tviewCount\tlikes\tuploads\nid:youtube:UCadJyExT6JcrA0kvKNHrXZw\t1\tyoutube#channel\t23\tLLadJyExT6JcrA0kvKNHrXZw\tUUadJyExT6JcrA0kvKNHrXZw\n"}},{"title":"","text":"%spark.sql\nselect actor.id, actor.displayName, summary, actor.extensions.followers, actor.extensions.posts, from youtube_pages","user":"anonymous","dateUpdated":"2016-12-09T16:18:10-0600","config":{"colWidth":12,"editorMode":"ace/mode/sql","graph":{"mode":"table","optionOpen":false,"keys":[{"name":"id","index":0,"aggr":"sum"}],"values":[{"name":"displayName","index":1,"aggr":"sum"}],"scatter":{"yAxis":{"name":"displayName","index":1,"aggr":"sum"},"xAxis":{"name":"id","index":0,"aggr":"sum"}},"groups":[],"height":84},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050382_1722449627","id":"20161115-184113_720380817","result":{"code":"SUCCESS","type":"TABLE","msg":"id\tdisplayName\tsummary\tfollowers\tposts\nid:youtube:UCadJyExT6JcrA0kvKNHrXZw\tTrevor Grant\t\t1\t2\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T12:42:49-0600","dateFinished":"2016-12-09T12:42:49-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:372"},{"title":"","text":"%spark.sql\nselect id\n, published\n, actor.id\n, actor.displayName\n, content\n, title from youtube_posts\n","user":"anonymous","dateUpdated":"2016-12-09T16:18:43-0600","config":{"colWidth":12,"editorMode":"ace/mode/sql","graph":{"mode":"table","optionOpen":false,"keys":[{"name":"id","index":0,"aggr":"sum"}],"values":[{"name":"published","index":1,"aggr":"sum"}],"scatter":{"xAxis":{"name":"id","index":0,"aggr":"sum"}},"groups":[],"height":300},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050383_1722064878","id":"20161114-115056_161108156","result":{"code":"SUCCESS","type":"TABLE","msg":"id\tpublished\tid\tdisplayName\tcontent\ttitle\nid:youtube:post:XAderH4bTyY\t2016-11-18T21:54:42.000Z\tid:youtube:UCadJyExT6JcrA0kvKNHrXZw\tTrevor Grant\tTrevor Grant presents Apache Mahout What s Next at Chicago Hadoop User Group CHUG November 17 2016 https github com rawkintrevo presentations blob master Mahout 20Whats 20Next 20CHUG 202016 11 17 pdf\tApache Mahout? What's Next!\n"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-09T12:44:52-0600","dateFinished":"2016-12-09T12:44:52-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:373"},{"title":"","text":"%md\n\nThanks for playing!\n","user":"anonymous","dateUpdated":"2016-12-08T14:50:03-0600","config":{"colWidth":12,"editorMode":"ace/mode/markdown","graph":{"mode":"table","optionOpen":false,"keys":[],"values":[],"scatter":{},"groups":[],"height":300},"enabled":true,"editorHide":true,"tableHide":false},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050388_1730914103","id":"20161114-115156_1010107862","result":{"code":"SUCCESS","type":"HTML","msg":"
\n

Thanks for playing!

\n
"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-08T14:50:03-0600","dateFinished":"2016-12-08T14:50:03-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:374"},{"text":"%md\n","user":"anonymous","dateUpdated":"2016-12-08T14:50:07-0600","config":{"colWidth":12,"editorMode":"ace/mode/markdown","graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorHide":true,"tableHide":false},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1481054050389_1730529354","id":"20161119-004139_331088641","result":{"code":"SUCCESS","type":"HTML","msg":"
\n\n
"},"dateCreated":"2016-12-06T13:54:10-0600","dateStarted":"2016-12-08T14:50:07-0600","dateFinished":"2016-12-08T14:50:07-0600","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:375"}],"name":"/streams/YouTube","id":"2C586X63W","angularObjects":{"2C2HHCJF1:shared_process":[],"2C46M2ZVJ:shared_process":[],"2C3V68W8P:shared_process":[],"2C19JEMQ9:shared_process":[],"2C2YXQN34:shared_process":[],"2C384D1GM:shared_process":[],"2C1GHKYU4:shared_process":[],"2C2HAKXN1:shared_process":[],"2C2NH353S:shared_process":[],"2C4K2APGM:shared_process":[],"2C4D651HB:shared_process":[],"2C2MMQZ4Q:shared_process":[],"flinkMahout:shared_process":[],"2C2K8DRPS:shared_process":[],"2C24HBDDQ:shared_process":[],"2C37PCAVQ:shared_process":[],"2C1FEWZE8:shared_process":[],"2C51YBDES:shared_process":[],"2C47B99DD:shared_process":[],"2C4J5T1AG:shared_process":[],"sparkMahout:shared_process":[]},"config":{"looknfeel":"default"},"info":{}}