{"paragraphs":[{"text":"%sh curl \"https://raw.githubusercontent.com/mmatloka/machine-learning-by-example-workshop/master/abstracts.csv\" -o abstracts.csv","user":"anonymous","dateUpdated":"2017-04-02T09:30:53+0000","config":{"editorSetting":{"language":"sh","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/sh","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283751_-1163559113","id":"20161012-204218_760048740","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:30:53+0000","dateFinished":"2017-04-02T09:30:54+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:11130"},{"text":"import org.apache.spark.sql.functions._\nimport org.apache.spark.ml._\nimport org.apache.spark.ml.feature._\nimport org.apache.spark.ml.classification._\nimport org.apache.spark.ml.evaluation._\nimport spark.implicits._ \nimport org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder}","user":"anonymous","dateUpdated":"2017-04-02T09:30:53+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283772_-1160865871","id":"20161012-204230_870345712","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:30:54+0000","dateFinished":"2017-04-02T09:30:59+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11131"},{"text":"%md\n# Hello hello\n * spark\n * zeppelin","user":"anonymous","dateUpdated":"2017-04-02T09:30:53+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"markdown","editOnDblClick":true},"editorMode":"ace/mode/markdown","editorHide":true,"tableHide":false},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1491067295372_-695690846","id":"20170401-172135_798841180","dateCreated":"2017-04-01T17:21:35+0000","dateStarted":"2017-04-02T09:30:54+0000","dateFinished":"2017-04-02T09:30:54+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11132"},{"text":"val data = spark.read.option(\"header\",\"true\").option(\"delimiter\",\";\").csv(\"abstracts.csv\").cache\n\nz.show(data)","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","helium":{},"results":{"1":{"graph":{"mode":"table","height":300,"optionOpen":false,"setting":{"multiBarChart":{"stacked":false}},"commonSetting":{},"keys":[{"name":"label","index":0,"aggr":"sum"}],"groups":[],"values":[{"name":"title","index":1,"aggr":"sum"}]},"helium":{}}},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283773_-1161250620","id":"20161012-204237_107838897","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:30:54+0000","dateFinished":"2017-04-02T09:31:01+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11133"},{"text":"data.printSchema","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1491066520873_1603877830","id":"20170401-170840_449155798","dateCreated":"2017-04-01T17:08:40+0000","dateStarted":"2017-04-02T09:31:00+0000","dateFinished":"2017-04-02T09:31:02+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11134"},{"text":"data.show","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1491066622060_2062268656","id":"20170401-171022_51998909","dateCreated":"2017-04-01T17:10:22+0000","dateStarted":"2017-04-02T09:31:01+0000","dateFinished":"2017-04-02T09:31:03+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11135"},{"text":"data.select($\"label\").distinct.show()","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1491066654582_1069277718","id":"20170401-171054_362585262","dateCreated":"2017-04-01T17:10:54+0000","dateStarted":"2017-04-02T09:31:02+0000","dateFinished":"2017-04-02T09:31:06+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11136"},{"text":"data.filter($\"label\" === \"Backend\").show()","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1491066755373_-1710639831","id":"20170401-171235_1579506098","dateCreated":"2017-04-01T17:12:35+0000","dateStarted":"2017-04-02T09:31:04+0000","dateFinished":"2017-04-02T09:31:08+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11137"},{"text":"val stringIndexer = new StringIndexer().setInputCol(\"label\").setOutputCol(\"indexedLabel\").fit(data)\n\nval indexed = stringIndexer.transform(data)\nz.show(indexed)","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283775_-1160481122","id":"20161013-174102_674414389","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:07+0000","dateFinished":"2017-04-02T09:31:12+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11138"},{"text":"val indexToString = new IndexToString()\n.setInputCol(\"prediction\").setOutputCol(\"predictionLabel\")\n.setLabels(stringIndexer.labels)\n\nval sqlTransformer = new SQLTransformer().setStatement(\"SELECT *, concat(title, ' ' , text) AS titleAndText FROM __THIS__\")\n \nval transformed = sqlTransformer.transform(indexed)\nz.show(transformed)","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283781_-1078144858","id":"20161013-174337_1823444878","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:08+0000","dateFinished":"2017-04-02T09:31:16+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11139"},{"text":"val regexTokenizer = new RegexTokenizer()\n.setInputCol(\"titleAndText\")\n.setOutputCol(\"words\")\n.setPattern(\"\\\\W+\")\n\nval words = regexTokenizer.transform(transformed)\nz.show(words)","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283831_-1083531342","id":"20161013-174458_533107829","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:12+0000","dateFinished":"2017-04-02T09:31:18+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11140"},{"text":"val stopWordsRemover = new StopWordsRemover()\n.setInputCol(\"words\")\n.setOutputCol(\"filtered\")\n\nval filtered = stopWordsRemover.transform(words)\nz.show(filtered)","user":"anonymous","dateUpdated":"2017-04-02T09:30:54+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283837_-1087378831","id":"20161013-174807_1260838266","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:16+0000","dateFinished":"2017-04-02T09:31:20+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11141"},{"text":"val hashingTF = new HashingTF().setInputCol(\"filtered\")\n.setOutputCol(\"features\")\n.setNumFeatures(2048)\n\nval hashed = hashingTF.transform(filtered)\nz.show(hashed)","user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283846_-1101614541","id":"20161013-174935_820262313","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:18+0000","dateFinished":"2017-04-02T09:31:22+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11142"},{"text":"val naiveBayes = new NaiveBayes().setLabelCol(\"indexedLabel\")\n.setFeaturesCol(\"features\")\n\nval pipeline = new Pipeline().setStages(Array(stringIndexer, sqlTransformer, regexTokenizer, stopWordsRemover, hashingTF, naiveBayes, indexToString))\n\nval Array(trainData, testData) = data.randomSplit(Array(0.75, 0.25))\n\nval model = pipeline.fit(trainData)","user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283855_-1105077281","id":"20161013-175234_1983146891","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:21+0000","dateFinished":"2017-04-02T09:31:26+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11143"},{"text":"val trainPredictions = model.transform(trainData)\nval testPredictions = model.transform(testData)\n\nz.show(testPredictions)","user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283856_-1094689060","id":"20161013-175638_63185388","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:23+0000","dateFinished":"2017-04-02T09:31:29+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11144"},{"text":"val evaluator = new MulticlassClassificationEvaluator()\n.setLabelCol(\"indexedLabel\")\n.setPredictionCol(\"prediction\")\n.setMetricName(\"accuracy\")\n\nval trainAccuracy = evaluator.evaluate(trainPredictions)\nval testAccuracy = evaluator.evaluate(testPredictions)","user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283858_-1093919563","id":"20161013-175727_265682740","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:27+0000","dateFinished":"2017-04-02T09:31:33+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11145"},{"text":"val newObservations = spark.createDataFrame(Seq(\n (\"Machine learning by example\", \"I will present Spark...\"),\n (\"Docker in Java\", \"...\")\n )).toDF(\"title\",\"text\")\n \nz.show(model.transform(newObservations))","user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283858_-1093919563","id":"20161013-175926_416704734","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:29+0000","dateFinished":"2017-04-02T09:31:35+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11146"},{"text":"val paramGrid = new ParamGridBuilder()\n .addGrid(hashingTF.numFeatures, Array(256, 512, 1024, 2048, 4096))\n .build()\n \nval cv = new CrossValidator()\n .setEstimator(pipeline)\n .setEvaluator(evaluator)\n .setEstimatorParamMaps(paramGrid)\n .setNumFolds(5) \n \nval cvModel = cv.fit(trainData)\nval cvPredictions = cvModel.transform(testData)\nevaluator.evaluate(cvPredictions)","user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"editorSetting":{"language":"scala","editOnDblClick":false},"colWidth":12,"editorMode":"ace/mode/scala","results":{},"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490546283859_-1094304311","id":"20161013-180111_87146992","dateCreated":"2017-03-26T16:38:03+0000","dateStarted":"2017-04-02T09:31:33+0000","dateFinished":"2017-04-02T09:31:52+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11147"},{"text":"cvModel.getEstimatorParamMaps\n .zip(cvModel.avgMetrics)\n ","user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490547501152_-285260050","id":"20170326-165821_2053828107","dateCreated":"2017-03-26T16:58:21+0000","dateStarted":"2017-04-02T09:31:36+0000","dateFinished":"2017-04-02T09:31:53+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11148"},{"user":"anonymous","dateUpdated":"2017-04-02T09:30:55+0000","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"scala","editOnDblClick":false},"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1490547782884_-1319453582","id":"20170326-170302_2084090910","dateCreated":"2017-03-26T17:03:02+0000","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:11149"}],"name":"Workshop","id":"2CDN8YDUN","angularObjects":{"2CBPQ81Z9:shared_process":[],"2CD39PTE7:shared_process":[],"2CAG5Y7DS:shared_process":[],"2CC16D29A:shared_process":[],"2C9YQEPS1:shared_process":[],"2CCP3JGRA:shared_process":[],"2CA5DSY1Q:shared_process":[],"2C9WN2RGA:shared_process":[],"2CD2HQKST:shared_process":[]},"config":{"looknfeel":"default","personalizedMode":"false"},"info":{}}