{"paragraphs":[{"text":"%spark\n\n// http://www.itshared.org/2015/04/apache-mahout-samsara-quick-start.html\n// add interpretter vars and jars\n\nimport org.apache.mahout.math._\nimport org.apache.mahout.math.scalabindings._\nimport org.apache.mahout.math.drm._\nimport org.apache.mahout.math.scalabindings.RLikeOps._\nimport org.apache.mahout.math.drm.RLikeDrmOps._\nimport org.apache.mahout.sparkbindings._\n\nimplicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)\n\nval matrix = dense(\n (2, 2, 10.5, 10, 29.509541), // Apple Cinnamon Cheerios\n (1, 2, 12, 12, 18.042851), // Cap'n'Crunch\n (1, 1, 12, 13, 22.736446), // Cocoa Puffs\n (2, 1, 11, 13, 32.207582), // Froot Loops\n (1, 2, 12, 11, 21.871292), // Honey Graham Ohs\n (2, 1, 16, 8, 36.187559), // Wheaties Honey Gold\n (6, 2, 17, 1, 50.764999), // Cheerios\n (3, 2, 13, 7, 40.400208), // Clusters\n (3, 3, 13, 4, 45.811716)) // Great Grains Pecan)\n \n \nval drmData = drmParallelize(matrix, numPartitions = 2)\n\nval drmX = drmData(::, 0 until 4)\nval y = drmData.collect(::, 4)\n\nval drmXtX = drmX.t %*% drmX\n\nval drmXty = drmX.t %*% y\n\n\nval XtX = drmXtX.collect\nval Xty = drmXty.collect(::, 0)\n\nval beta = solve(XtX, Xty)\n\nval yFitted = (drmX %*% beta).collect(::, 0)\n(y - yFitted).norm(2)\n\ndef ols(drmX: DrmLike[Int], y: Vector) = \n solve(drmX.t %*% drmX, drmX.t %*% y)(::, 0)\n \n def goodnessOfFit(drmX: DrmLike[Int], beta: Vector, y: Vector) = {\n val fittedY = (drmX %*% beta).collect(::, 0)\n (y - fittedY).norm(2)\n}\n\nval drmXwithBiasColumn = drmX cbind 1\n\nval betaWithBiasTerm = ols(drmXwithBiasColumn, y)\ngoodnessOfFit(drmXwithBiasColumn, betaWithBiasTerm, y)\n\nval cachedDrmX = drmXwithBiasColumn.checkpoint()\n\nval betaWithBiasTerm = ols(cachedDrmX, y)\nval goodness = goodnessOfFit(cachedDrmX, betaWithBiasTerm, y)\n\ncachedDrmX.uncache()\n\ngoodness","authenticationInfo":{},"dateUpdated":"May 18, 2016 8:15:46 AM","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true,"editorMode":"ace/mode/scala"},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1463542119378_523266244","id":"20160517-222839_1660098348","result":{"code":"SUCCESS","type":"TEXT","msg":"import org.apache.mahout.math._\nimport org.apache.mahout.math.scalabindings._\nimport org.apache.mahout.math.drm._\nimport org.apache.mahout.math.scalabindings.RLikeOps._\nimport org.apache.mahout.math.drm.RLikeDrmOps._\nimport org.apache.mahout.sparkbindings._\nsdc: org.apache.mahout.sparkbindings.SparkDistributedContext = org.apache.mahout.sparkbindings.SparkDistributedContext@34ae2b88\nwarning: Class it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap not found - continuing with a stub.\nmatrix: org.apache.mahout.math.DenseMatrix = \n{\n 0 =>\t{0:2.0,1:2.0,2:10.5,3:10.0,4:29.509541}\n 1 =>\t{0:1.0,1:2.0,2:12.0,3:12.0,4:18.042851}\n 2 =>\t{0:1.0,1:1.0,2:12.0,3:13.0,4:22.736446}\n 3 =>\t{0:2.0,1:1.0,2:11.0,3:13.0,4:32.207582}\n 4 =>\t{0:1.0,1:2.0,2:12.0,3:11.0,4:21.871292}\n 5 =>\t{0:2.0,1:1.0,2:16.0,3:8.0,4:36.187559}\n 6 =>\t{0:6.0,1:2.0,2:17.0,3:1.0,4:50.764999}\n 7 =>\t{0:3.0,1:2.0,2:13.0,3:7.0,4:40.400208}\n 8 =>\t{0:3.0,1:3.0,2:13.0,3:4.0,4:45.811716}\n}\ndrmData: org.apache.mahout.math.drm.CheckpointedDrm[Int] = org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@3194741c\ndrmX: org.apache.mahout.math.drm.DrmLike[Int] = OpMapBlock(org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@3194741c,,4,-1,true)\ny: org.apache.mahout.math.Vector = {0:29.509541,1:18.042851,2:22.736446,3:32.207582,4:21.871292,5:36.187559,6:50.764999,7:40.400208,8:45.811716}\ndrmXtX: org.apache.mahout.math.drm.DrmLike[Int] = OpABAnyKey(OpAt(OpMapBlock(org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@3194741c,,4,-1,true)),OpMapBlock(org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@3194741c,,4,-1,true))\ndrmXty: org.apache.mahout.math.drm.DrmLike[Int] = OpAx(OpAt(OpMapBlock(org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@3194741c,,4,-1,true)),{0:29.509541,1:18.042851,2:22.736446,3:32.207582,4:21.871292,5:36.187559,6:50.764999,7:40.400208,8:45.811716})\nXtX: org.apache.mahout.math.Matrix = \n{\n 0 =>\t{0:69.0,1:40.0,2:291.0,3:137.0}\n 1 =>\t{0:40.0,1:32.0,2:207.0,3:128.0}\n 2 =>\t{0:291.0,1:207.0,2:1546.25,3:968.0}\n 3 =>\t{0:137.0,1:128.0,2:968.0,3:833.0}\n}\nXty: org.apache.mahout.math.Vector = {0:821.6857190000001,1:549.744517,2:3978.7015894999995,3:2272.779989}\nbeta: org.apache.mahout.math.Vector = {0:5.247349465378446,1:2.750794578467531,2:1.1527813010791554,3:0.10312017617608908}\nyFitted: org.apache.mahout.math.Vector = {0:29.131693510783975,1:25.819756349376444,2:23.172081947084997,3:27.266650111384287,4:25.716636173200357,5:32.514955735899626,6:56.68608824372747,7:36.95163570033205,8:39.393069750271316}\nres1: Double = 14.200396723606845\nols: (drmX: org.apache.mahout.math.drm.DrmLike[Int], y: org.apache.mahout.math.Vector)org.apache.mahout.math.Vector\ngoodnessOfFit: (drmX: org.apache.mahout.math.drm.DrmLike[Int], beta: org.apache.mahout.math.Vector, y: org.apache.mahout.math.Vector)Double\ndrmXwithBiasColumn: org.apache.mahout.math.drm.DrmLike[Int] = OpCbindScalar(OpMapBlock(org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@3194741c,,4,-1,true),1.0,false)\nbetaWithBiasTerm: org.apache.mahout.math.Vector = {0:-1.3362653883272289,1:-13.15770132067483,2:-4.152654199020216,3:-5.679908094232256,4:163.1793268784127}\nres2: Double = 7.623280714561956\ncachedDrmX: org.apache.mahout.math.drm.CheckpointedDrm[Int] = org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@4e0af0d\nbetaWithBiasTerm: org.apache.mahout.math.Vector = {0:-1.3362653883272289,1:-13.15770132067483,2:-4.152654199020216,3:-5.679908094232256,4:163.1793268784127}\ngoodness: Double = 7.623280714561956\nres3: cachedDrmX.type = org.apache.mahout.sparkbindings.drm.CheckpointedDrmSpark@4e0af0d\nres4: Double = 7.623280714561956\n"},"dateCreated":"May 17, 2016 10:28:39 PM","dateStarted":"May 18, 2016 8:15:46 AM","dateFinished":"May 18, 2016 8:16:21 AM","status":"ABORT","progressUpdateIntervalMs":500,"$$hashKey":"object:261","focus":true},{"config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{}},"enabled":true},"settings":{"params":{},"forms":{}},"jobName":"paragraph_1463575858988_632923199","id":"20160518-075058_175420733","dateCreated":"May 18, 2016 7:50:58 AM","status":"READY","progressUpdateIntervalMs":500,"$$hashKey":"object:262"}],"name":"[MAHOUT][PROVING-GROUNDS]Linear Regression in Spark","id":"2BM7XY634","angularObjects":{"2BANREB9T:shared_process":[],"2B95VQPWE:shared_process":[],"2B7FY26FF:shared_process":[],"2BA9PQW6K:shared_process":[],"2BAZ6AN3R:shared_process":[],"2B89JN8NB:shared_process":[],"2BAUGSG18:shared_process":[],"2BKRP1BV8:shared_process":[],"2B88H2DWF:shared_process":[],"2B7JPXM9Z:shared_process":[],"2B91YJ1JR:shared_process":[],"2B9FGGAMR:shared_process":[],"2B9NH1VM4:shared_process":[]},"config":{"looknfeel":"default"},"info":{}}