{"paragraphs":[{"title":"Download Text Data on Apache NiFi","text":"%sh wget https://raw.githubusercontent.com/roberthryniewicz/datasets/master/About-Apache-NiFi.txt -O /tmp/nifi.txt","dateUpdated":"2016-04-11T08:55:03+0000","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/sh","title":true},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1456516185852_-361375517","id":"20160226-194945_2037625547","dateCreated":"2016-02-26T07:49:45+0000","dateStarted":"2016-04-11T08:55:03+0000","dateFinished":"2016-04-11T08:55:03+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:187"},{"title":"Move downloaded text to HDFS","text":"%sh\n\n# Remove existing copy of data from HDFS\nhadoop fs -rm -f /tmp/nifi.txt\n\n# Move latest text data to HDFS\nhadoop fs -put /tmp/nifi.txt /tmp","dateUpdated":"2016-04-11T08:56:54+0000","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/sh","title":true},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1456517163390_-1029878637","id":"20160226-200603_237571112","dateCreated":"2016-02-26T08:06:03+0000","dateStarted":"2016-04-11T08:56:27+0000","dateFinished":"2016-04-11T08:56:32+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:188"},{"title":"Count the number of non-empty lines","text":"%pyspark\n\n# Create an RDD from text file in HDFS\nmyLines = sc.textFile('hdfs:///tmp/nifi.txt')\n\n# Filter out empty lines\nmyLinesFiltered = myLines.filter( lambda x: len(x) > 0 )\n\n# Count number of non-empty lines\ncount = myLinesFiltered.count()\nprint count","dateUpdated":"2016-04-11T08:58:50+0000","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala","title":true},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1456517186199_1404858375","id":"20160226-200626_1038350828","dateCreated":"2016-02-26T08:06:26+0000","dateStarted":"2016-04-11T08:58:19+0000","dateFinished":"2016-04-11T08:58:19+0000","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:189"},{"config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1456517209483_1653806238","id":"20160226-200649_425588199","dateCreated":"2016-02-26T08:06:49+0000","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:190"}],"name":"Getting Started / Apache Spark in 5 Minutes","id":"2BEQE47HR","angularObjects":{},"config":{"looknfeel":"default"},"info":{}}