<!DOCTYPE html>
<html>
<head>
  <meta name="databricks-html-version" content="1">
<title>031_H2O_sparkling_water - Databricks</title>

<meta charset="utf-8">
<meta name="google" content="notranslate">
<meta http-equiv="Content-Language" content="en">
<meta http-equiv="Content-Type" content="text/html; charset=UTF8">
<link rel="stylesheet"
  href="https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700">

<link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/lib/css/bootstrap.min.css">
<link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/lib/jquery-ui-bundle/jquery-ui.min.css">
<link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/css/main.css">
<link rel="stylesheet" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/css/print.css" media="print">
<link rel="icon" type="image/png" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/img/favicon.ico"/>
<script>window.settings = {"sparkDocsSearchGoogleCx":"004588677886978090460:_rj0wilqwdm","dbcForumURL":"http://forums.databricks.com/","dbfsS3Host":"https://databricks-prod-storage-sydney.s3.amazonaws.com","enableThirdPartyApplicationsUI":false,"enableClusterAcls":false,"notebookRevisionVisibilityHorizon":0,"enableTableHandler":true,"isAdmin":true,"enableLargeResultDownload":false,"nameAndEmail":"Raazesh Sainudiin (r.sainudiin@math.canterbury.ac.nz)","enablePresentationTimerConfig":true,"enableFullTextSearch":true,"enableElasticSparkUI":true,"clusters":true,"hideOffHeapCache":false,"applications":false,"useStaticGuide":false,"fileStoreBase":"FileStore","configurableSparkOptionsSpec":[{"keyPattern":"spark\\.kryo(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.kryo.*","valuePatternDisplay":"*","description":"Configuration options for Kryo serialization"},{"keyPattern":"spark\\.io\\.compression\\.codec","valuePattern":"(lzf|snappy|org\\.apache\\.spark\\.io\\.LZFCompressionCodec|org\\.apache\\.spark\\.io\\.SnappyCompressionCodec)","keyPatternDisplay":"spark.io.compression.codec","valuePatternDisplay":"snappy|lzf","description":"The codec used to compress internal data such as RDD partitions, broadcast variables and shuffle outputs."},{"keyPattern":"spark\\.serializer","valuePattern":"(org\\.apache\\.spark\\.serializer\\.JavaSerializer|org\\.apache\\.spark\\.serializer\\.KryoSerializer)","keyPatternDisplay":"spark.serializer","valuePatternDisplay":"org.apache.spark.serializer.JavaSerializer|org.apache.spark.serializer.KryoSerializer","description":"Class to use for serializing objects that will be sent over the network or need to be cached in serialized form."},{"keyPattern":"spark\\.rdd\\.compress","valuePattern":"(true|false)","keyPatternDisplay":"spark.rdd.compress","valuePatternDisplay":"true|false","description":"Whether to compress serialized RDD partitions (e.g. for StorageLevel.MEMORY_ONLY_SER). Can save substantial space at the cost of some extra CPU time."},{"keyPattern":"spark\\.speculation","valuePattern":"(true|false)","keyPatternDisplay":"spark.speculation","valuePatternDisplay":"true|false","description":"Whether to use speculation (recommended off for streaming)"},{"keyPattern":"spark\\.es(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"es(\\.([^\\.]+))+","valuePattern":".*","keyPatternDisplay":"es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"spark\\.(storage|shuffle)\\.memoryFraction","valuePattern":"0?\\.0*([1-9])([0-9])*","keyPatternDisplay":"spark.(storage|shuffle).memoryFraction","valuePatternDisplay":"(0.0,1.0)","description":"Fraction of Java heap to use for Spark's shuffle or storage"},{"keyPattern":"spark\\.streaming\\.backpressure\\.enabled","valuePattern":"(true|false)","keyPatternDisplay":"spark.streaming.backpressure.enabled","valuePatternDisplay":"true|false","description":"Enables or disables Spark Streaming's internal backpressure mechanism (since 1.5). This enables the Spark Streaming to control the receiving rate based on the current batch scheduling delays and processing times so that the system receives only as fast as the system can process. Internally, this dynamically sets the maximum receiving rate of receivers. This rate is upper bounded by the values `spark.streaming.receiver.maxRate` and `spark.streaming.kafka.maxRatePerPartition` if they are set."},{"keyPattern":"spark\\.streaming\\.receiver\\.maxRate","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.receiver.maxRate","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which each receiver will receive data. Effectively, each stream will consume at most this number of records per second. Setting this configuration to 0 or a negative number will put no limit on the rate. See the deployment guide in the Spark Streaming programing guide for mode details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRatePerPartition","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRatePerPartition","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which data will be read from each Kafka partition when using the Kafka direct stream API introduced in Spark 1.3. See the Kafka Integration guide for more details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRetries","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRetries","valuePatternDisplay":"numeric","description":"Maximum number of consecutive retries the driver will make in order to find the latest offsets on the leader of each partition (a default value of 1 means that the driver will make a maximum of 2 attempts). Only applies to the Kafka direct stream API introduced in Spark 1.3."},{"keyPattern":"spark\\.streaming\\.ui\\.retainedBatches","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.ui.retainedBatches","valuePatternDisplay":"numeric","description":"How many batches the Spark Streaming UI and status APIs remember before garbage collecting."}],"enableReactNotebookComments":true,"enableResetPassword":true,"enableJobsSparkUpgrade":true,"sparkVersions":[{"key":"1.3.x-ubuntu15.10","displayName":"Spark 1.3.0","packageLabel":"spark-1.3-jenkins-ip-10-30-9-162-U0c2673ac85-Sa2ee4664b2-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.4.x-ubuntu15.10","displayName":"Spark 1.4.1","packageLabel":"spark-1.4-jenkins-ip-10-30-9-162-U0c2673ac85-S33a1e4b9c6-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.5.x-ubuntu15.10","displayName":"Spark 1.5.2","packageLabel":"spark-1.5-jenkins-ip-10-30-9-162-U0c2673ac85-S5917a1044d-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.x-ubuntu15.10","displayName":"Spark 1.6.0","packageLabel":"spark-1.6-jenkins-ip-10-30-9-162-U0c2673ac85-Scabba801f3-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"master","displayName":"Spark master (dev)","packageLabel":"","upgradable":true,"deprecated":false,"customerVisible":false}],"enableRestrictedClusterCreation":false,"enableFeedback":false,"defaultNumWorkers":8,"serverContinuationTimeoutMillis":10000,"driverStderrFilePrefix":"stderr","driverStdoutFilePrefix":"stdout","enableSparkDocsSearch":true,"prefetchSidebarNodes":true,"sparkHistoryServerEnabled":true,"sanitizeMarkdownHtml":true,"enableIPythonImportExport":true,"enableNotebookHistoryDiffing":true,"branch":"2.12.3","accountsLimit":-1,"enableNotebookGitBranching":true,"local":false,"displayDefaultContainerMemoryGB":6,"deploymentMode":"production","useSpotForWorkers":false,"enableUserInviteWorkflow":false,"enableStaticNotebooks":true,"dbcGuideURL":"#workspace/databricks_guide/00 Welcome to Databricks","enableCssTransitions":true,"pricingURL":"https://databricks.com/product/pricing","enableClusterAclsConfig":false,"orgId":0,"enableNotebookGitVersioning":true,"files":"files/","enableDriverLogsUI":true,"disableLegacyDashboards":false,"enableWorkspaceAclsConfig":true,"dropzoneMaxFileSize":4096,"enableNewDashboardViews":false,"driverLog4jFilePrefix":"log4j","enableMavenLibraries":true,"displayRowLimit":1000,"defaultSparkVersion":{"key":"1.5.x-ubuntu15.10","displayName":"Spark 1.5.2","packageLabel":"spark-1.5-jenkins-ip-10-30-9-162-U0c2673ac85-S5917a1044d-2016-02-09-02:05:59.455061","upgradable":true,"deprecated":false,"customerVisible":true},"clusterPublisherRootId":5,"enableLatestJobRunResultPermalink":true,"disallowAddingAdmins":false,"enableSparkConfUI":true,"enableOrgSwitcherUI":false,"clustersLimit":-1,"enableJdbcImport":true,"logfiles":"logfiles/","enableWebappSharding":false,"enableClusterDeltaUpdates":true,"csrfToken":"4c37e4ff-a908-4b05-8c61-2b76819fa34c","useFixedStaticNotebookVersionForDevelopment":false,"enableBasicReactDialogBoxes":true,"requireEmailUserName":true,"enableDashboardViews":false,"dbcFeedbackURL":"http://feedback.databricks.com/forums/263785-product-feedback","enableWorkspaceAclService":true,"someName":"Raazesh Sainudiin","enableWorkspaceAcls":true,"gitHash":"0c2673ac858e227cad536fdb45d140aeded238db","userFullname":"Raazesh Sainudiin","enableClusterCreatePage":false,"enableImportFromUrl":true,"enableMiniClusters":false,"enableWebSocketDeltaUpdates":true,"enableDebugUI":false,"showHiddenSparkVersions":false,"allowNonAdminUsers":true,"userId":100005,"dbcSupportURL":"","staticNotebookResourceUrl":"https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/","enableSparkPackages":true,"enableHybridClusterType":false,"enableNotebookHistoryUI":true,"availableWorkspaces":[{"name":"Workspace 0","orgId":0}],"enableFolderHtmlExport":true,"enableSparkVersionsUI":true,"databricksGuideStaticUrl":"","enableHybridClusters":true,"notebookLoadingBackground":"#fff","enableNewJobRunDetailsPage":true,"enableDashboardExport":true,"user":"r.sainudiin@math.canterbury.ac.nz","enableServerAutoComplete":true,"enableStaticHtmlImport":true,"defaultMemoryPerContainerMB":6000,"enablePresenceUI":true,"tablesPublisherRootId":7,"enableNewInputWidgetUI":false,"accounts":true,"enableNewProgressReportUI":true,"defaultCoresPerContainer":4};</script>
<script>var __DATABRICKS_NOTEBOOK_MODEL = {"version":"NotebookV1","origId":112849,"name":"031_H2O_sparkling_water","language":"scala","commands":[{"version":"CommandV1","origId":112851,"guid":"7c16b2c2-10b3-490c-b26c-cd8d94f77959","subtype":"command","commandType":"auto","position":1.0,"command":"%md\n\n# [Scalable Data Science](http://www.math.canterbury.ac.nz/~r.sainudiin/courses/ScalableDataScience/)\n\n\n### prepared by [Raazesh Sainudiin](https://nz.linkedin.com/in/raazesh-sainudiin-45955845) and [Sivanand Sivaram](https://www.linkedin.com/in/sivanand)\n\n*supported by* [![](https://raw.githubusercontent.com/raazesh-sainudiin/scalable-data-science/master/images/databricks_logoTM_200px.png)](https://databricks.com/)\nand \n[![](https://raw.githubusercontent.com/raazesh-sainudiin/scalable-data-science/master/images/AWS_logoTM_200px.png)](https://www.awseducate.com/microsite/CommunitiesEngageHome)","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"40b24e94-ecc5-4902-8391-4aceb6b4917b"},{"version":"CommandV1","origId":129740,"guid":"2e68f8dd-d468-44da-a1b8-e03b3f938bab","subtype":"command","commandType":"auto","position":1.000030517578125,"command":"%md\nThe [html source url](https://raw.githubusercontent.com/raazesh-sainudiin/scalable-data-science/master/db/week9/17_SparklingWater/031_H2O_sparkling_water.html) of this databricks notebook and its recorded Uji ![Image of Uji, Dogen's Time-Being](https://raw.githubusercontent.com/raazesh-sainudiin/scalable-data-science/master/images/UjiTimeBeingDogen.png \"uji\"):\n\n[![sds/uji/week9/17_SparklingWater/031_H2O_sparkling_water](http://img.youtube.com/vi/iDyeK3GvFpo/0.jpg)](https://www.youtube.com/v/iDyeK3GvFpo?rel=0&autoplay=1&modestbranding=1&start=3093&end=3669)\n","commandVersion":0,"state":"error","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"bb1bf151-d1bb-4b8c-933b-8b686749796d"},{"version":"CommandV1","origId":112902,"guid":"c86569e3-f569-457d-aaad-b924c0976490","subtype":"command","commandType":"auto","position":1.00006103515625,"command":"%md\n# Deep learning with H2O.ai and Spark\n* This notebook provides an introduction to the use of Deep Learning algorithms with H2O.ai and Spark\n* It introduces H2O.ai a distributed machine learning framework\n* It shows an example deep learning application written in H2O.ai (Sparkling water) and Spark\n","commandVersion":0,"state":"error","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"0adec54f-5754-4b6e-b279-3b340d5ce64d"},{"version":"CommandV1","origId":112852,"guid":"850fb814-0e6a-4a2a-bbe0-fcbae7b0f53a","subtype":"command","commandType":"auto","position":1.0001220703125,"command":"%md\n#### H2O features - overview\n[http://www.h2o.ai/product/downloads/recommended-systems.pdf](http://www.h2o.ai/product/downloads/recommended-systems.pdf)\n***\n\n* H2O Core\n* H2O Flow\n* Algorithms\n* Sparkling Water\n* H2O Frame","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"484df467-9edf-4c29-b9f7-1e08c0c8639d"},{"version":"CommandV1","origId":112853,"guid":"597f24fc-7734-446e-8722-0c180e9ca6b8","subtype":"command","commandType":"auto","position":1.000152587890625,"command":"%md\n###H2O.ai - Architecture  \n[https://github.com/h2oai/h2o-3/blob/master/h2o-docs/src/product/architecture/Architecture.md](https://github.com/h2oai/h2o-3/blob/master/h2o-docs/src/product/architecture/Architecture.md)\n\n![H2O.ai - Architecture](https://raw.githubusercontent.com/h2oai/h2o-3/master/h2o-docs/src/product/architecture/images/h2o_stack.png)\n\n-- Image Credit: Sparkling water\n","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"39b797c6-f822-4995-a41b-f81302ef9825"},{"version":"CommandV1","origId":112854,"guid":"10b6499c-0f15-4f98-82bd-75b08bb147b6","subtype":"command","commandType":"auto","position":1.00018310546875,"command":"%md\n\n### H2O Flow\n\n**Watch later (2:28 seconds)**:\n\n[![H2O Flow](http://img.youtube.com/vi/wzeuFfbW7WE/0.jpg)](https://www.youtube.com/v/wzeuFfbW7WE?rel=0&autoplay=1&modestbranding=1)\n\n","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"003981f8-da7b-4da9-94ea-530a5a5bb2c6"},{"version":"CommandV1","origId":112855,"guid":"68563325-430f-4864-8ea7-58402aa4e0b9","subtype":"command","commandType":"auto","position":1.000213623046875,"command":"%md\n###Algorithms  \n\n####Supervised Learning\n* Generalized Linear Modeling (GLM): [Tutorial](http://learn.h2o.ai/content/tutorials/glm/glm.html?_ga=1.123084171.1438470624.1462131663) | [Reference](http://h2o-release.s3.amazonaws.com/h2o/latest_stable_GLM_booklet.html)\n* Gradient Boosting Machine (GBM):  [Tutorial](http://learn.h2o.ai/content/tutorials/gbm-randomforest/index.html?_ga=1.60642349.1438470624.1462131663) | [Reference](http://h2o-release.s3.amazonaws.com/h2o/latest_stable_GBM_booklet.html)\n* Deep Learning: [Tutorial](http://learn.h2o.ai/content/tutorials/deeplearning/index.html?_ga=1.127736589.1438470624.1462131663) | [Reference](http://h2o-release.s3.amazonaws.com/h2o/latest_stable_DeepLearning_booklet.html)\n* Ensembles (Stacking): [Tutorial](http://learn.h2o.ai/content/tutorials/ensembles-stacking/index.html?_ga=1.127736589.1438470624.1462131663) |  [Reference](https://github.com/h2oai/h2o-3/blob/master/h2o-r/ensemble/README.md)\n* Distributed Random Forest:  [Tutorial](https://github.com/h2oai/h2o-3/blob/master/h2o-docs/src/product/tutorials/rf/rf.md)\n* Naive Bayes: [Reference](http://h2o-release.s3.amazonaws.com/h2o/rel-turchin/3/docs-website/h2o-docs/index.html#Data%20Science%20Algorithms-Na%C3%AFve%20Bayes)\n***\n\n####Unsupervised Learning\n* Generalized Low Ranked Modeling (GLRM):  [Tutorial](http://learn.h2o.ai/content/tutorials/glrm/glrm-tutorial.html?_ga=1.127908877.1438470624.1462131663) |  [Reference](http://arxiv.org/abs/1410.0342)\n* K-Means:  [Tutorial](https://github.com/h2oai/h2o-3/blob/master/h2o-docs/src/product/tutorials/kmeans/kmeans.md)\n* PCA:  [Tutorial](https://github.com/h2oai/h2o-3/blob/master/h2o-docs/src/product/tutorials/pca/pca.md)\n* Anomaly Detection via Deep Learning: [Tutorial](https://github.com/h2oai/h2o-training-book/blob/master/hands-on_training/anomaly_detection.md)","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"5e77c66e-0f55-4dbe-a97f-97ab514f1f26"},{"version":"CommandV1","origId":112856,"guid":"19fdf4e4-2fa2-42cc-b076-fca6bd656bc5","subtype":"command","commandType":"auto","position":1.000244140625,"command":"%md\n###H2O.ai - Sparkling water  \n[https://github.com/h2oai/sparkling-water/blob/master/DEVEL.md](https://github.com/h2oai/sparkling-water/blob/master/DEVEL.md)\n\n\n\n![H2O.ai - Sparkling water](https://raw.githubusercontent.com/h2oai/sparkling-water/master/design-doc/images/Sparkling%20Water%20cluster.png)\n\n-- Image Credit: Sparkling water","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"b6bce077-b22c-4994-baf8-286d541c9dd5"},{"version":"CommandV1","origId":112857,"guid":"e362dd66-b143-4dfa-8ba0-c3137fbda5df","subtype":"command","commandType":"auto","position":1.001953125,"command":"%md\n#####Data sharing between RDD, DataFrame and H2OFrame\n\n![H2O.ai - Sparkling water](https://raw.githubusercontent.com/h2oai/sparkling-water/master/design-doc/images/DataShare.png)\n\n-- Image Credit: Sparkling water\n","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"154fb03f-c7dc-4b40-aba5-373c1b3cbdc6"},{"version":"CommandV1","origId":112858,"guid":"408a0f84-b6a9-46a3-b74d-f674e0d6c517","subtype":"command","commandType":"auto","position":1.5009765625,"command":"%md\n### APIs\n* [Core API](http://h2o-release.s3.amazonaws.com/h2o/rel-turchin/3/docs-website/h2o-core/javadoc/index.html)  \n* [Algorithms](http://h2o-release.s3.amazonaws.com/h2o/rel-turchin/3/docs-website/h2o-algos/javadoc/index.html)\n* [Models](http://h2o-release.s3.amazonaws.com/h2o/rel-turchin/3/docs-website/h2o-genmodel/javadoc/index.html)    \n***\n  \n[Deep learning booklet](http://h2o-release.s3.amazonaws.com/h2o/rel-turchin/3/docs-website/h2o-docs/booklets/DeepLearning_Vignette.pdf)\n  \n  ","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"c09faaa0-a5cd-48e3-80d9-d57b81bc4251"},{"version":"CommandV1","origId":112859,"guid":"a448d178-f4ed-4240-9864-96cd8aa62567","subtype":"command","commandType":"auto","position":2.0,"command":"%md\n\n# [Scalable Data Science](http://www.math.canterbury.ac.nz/~r.sainudiin/courses/ScalableDataScience/)\n\n\n### prepared by [Raazesh Sainudiin](https://nz.linkedin.com/in/raazesh-sainudiin-45955845) and [Sivanand Sivaram](https://www.linkedin.com/in/sivanand)\n\n*supported by* [![](https://raw.githubusercontent.com/raazesh-sainudiin/scalable-data-science/master/images/databricks_logoTM_200px.png)](https://databricks.com/)\nand \n[![](https://raw.githubusercontent.com/raazesh-sainudiin/scalable-data-science/master/images/AWS_logoTM_200px.png)](https://www.awseducate.com/microsite/CommunitiesEngageHome)","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"9ab1ac45-05d0-4194-acd6-e9db9b1ea489"}],"dashboards":[],"guid":"d1aa1923-35ef-4efd-ac89-89318289dc0e","globalVars":{},"iPythonMetadata":null,"inputWidgets":{}};</script>
<script
 src="https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/js/notebook-main.js"
 onerror="window.mainJsLoadError = true;"></script>
</head>
<body>
  <script>
if (window.mainJsLoadError) {
  var u = 'https://databricks-prod-cloudfront.cloud.databricks.com/static/201602081754420800-0c2673ac858e227cad536fdb45d140aeded238db/js/notebook-main.js';
  var b = document.getElementsByTagName('body')[0];
  var c = document.createElement('div');
  c.innerHTML = ('<h1>Network Error</h1>' +
    '<p><b>Please check your network connection and try again.</b></p>' +
    '<p>Could not load a required resource: ' + u + '</p>');
  c.style.margin = '30px';
  c.style.padding = '20px 50px';
  c.style.backgroundColor = '#f5f5f5';
  c.style.borderRadius = '5px';
  b.appendChild(c);
}
</script>
</body>
</html>