<!DOCTYPE html>
<html>
<head>
  <meta name="databricks-html-version" content="1">
<title>4.1_SparkR-Introduction - Databricks</title>

<meta charset="utf-8">
<meta name="google" content="notranslate">
<meta name="robots" content="nofollow">
<meta http-equiv="Content-Language" content="en">
<meta http-equiv="Content-Type" content="text/html; charset=UTF8">
<link rel="stylesheet"
  href="https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700">

<link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/lib/css/bootstrap.min.css">
<link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/lib/jquery-ui-bundle/jquery-ui.min.css">
<link rel="stylesheet" type="text/css" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/css/main.css">
<link rel="stylesheet" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/css/print.css" media="print">
<link rel="icon" type="image/png" href="https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/img/favicon.ico"/>
<script>window.settings = {"enableNotebookNotifications":true,"enableSshKeyUI":false,"defaultInteractivePricePerDBU":0.4,"enableOnDemandClusterType":true,"enableAutoCompleteAsYouType":[],"devTierName":"Community Edition","enableJobsPrefetching":true,"workspaceFeaturedLinks":[{"linkURI":"https://docs.databricks.com/index.html","displayName":"Documentation","icon":"question"},{"linkURI":"https://docs.databricks.com/release-notes/product/index.html","displayName":"Release Notes","icon":"code"},{"linkURI":"https://docs.databricks.com/spark/latest/training/index.html","displayName":"Training & Tutorials","icon":"graduation-cap"}],"enableClearStateFeature":false,"dbcForumURL":"http://forums.databricks.com/","enableProtoClusterInfoDeltaPublisher":true,"serverlessDefaultSparkVersion":"2.2.x-scala2.11","maxCustomTags":45,"serverlessDefaultMaxWorkers":20,"enableInstanceProfilesUIInJobs":true,"nodeInfo":{"node_types":[{"support_ssh":false,"spark_heap_memory":4800,"instance_type_id":"r3.2xlarge","spark_core_oversubscription_factor":8.0,"node_type_id":"dev-tier-node","description":"Community Optimized","support_cluster_tags":false,"container_memory_mb":6000,"node_instance_type":{"instance_type_id":"r3.2xlarge","provider":"AWS","local_disk_size_gb":160,"compute_units":26.0,"number_of_ips":14,"local_disks":1,"reserved_compute_units":3.64,"gpus":0,"memory_mb":62464,"num_cores":8,"local_disk_type":"AHCI","max_attachable_disks":0,"supported_disk_types":[{"ebs_volume_type":"GENERAL_PURPOSE_SSD"},{"ebs_volume_type":"THROUGHPUT_OPTIMIZED_HDD"}],"reserved_memory_mb":4800},"memory_mb":6144,"is_hidden":false,"category":"Community Edition","num_cores":0.88,"support_port_forwarding":false,"support_ebs_volumes":false,"is_deprecated":false}],"default_node_type_id":"dev-tier-node"},"enableDatabaseSupportClusterChoice":true,"enableClusterAcls":true,"notebookRevisionVisibilityHorizon":999999,"serverlessClusterProductName":"Serverless Pool","enableTableHandler":false,"maxEbsVolumesPerInstance":10,"isAdmin":true,"deltaProcessingBatchSize":1000,"timerUpdateQueueLength":100,"enableLargeResultDownload":true,"maxElasticDiskCapacityGB":5000,"serverlessDefaultMinWorkers":2,"zoneInfos":[{"id":"us-west-2c","isDefault":true},{"id":"us-west-2b","isDefault":false},{"id":"us-west-2a","isDefault":false}],"enableCustomSpotPricingUIByTier":false,"serverlessClustersEnabled":false,"enableEBSVolumesUIForJobs":true,"enablePublishNotebooks":true,"enableMaxConcurrentRuns":true,"enableJobAclsConfig":false,"enableFullTextSearch":false,"enableElasticSparkUI":false,"enableNewClustersCreate":true,"clusters":true,"allowRunOnPendingClusters":true,"fileStoreBase":"FileStore","enableSshKeyUIInJobs":true,"enableDetachAndAttachSubMenu":false,"configurableSparkOptionsSpec":[{"keyPattern":"spark\\.kryo(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.kryo.*","valuePatternDisplay":"*","description":"Configuration options for Kryo serialization"},{"keyPattern":"spark\\.io\\.compression\\.codec","valuePattern":"(lzf|snappy|org\\.apache\\.spark\\.io\\.LZFCompressionCodec|org\\.apache\\.spark\\.io\\.SnappyCompressionCodec)","keyPatternDisplay":"spark.io.compression.codec","valuePatternDisplay":"snappy|lzf","description":"The codec used to compress internal data such as RDD partitions, broadcast variables and shuffle outputs."},{"keyPattern":"spark\\.serializer","valuePattern":"(org\\.apache\\.spark\\.serializer\\.JavaSerializer|org\\.apache\\.spark\\.serializer\\.KryoSerializer)","keyPatternDisplay":"spark.serializer","valuePatternDisplay":"org.apache.spark.serializer.JavaSerializer|org.apache.spark.serializer.KryoSerializer","description":"Class to use for serializing objects that will be sent over the network or need to be cached in serialized form."},{"keyPattern":"spark\\.rdd\\.compress","valuePattern":"(true|false)","keyPatternDisplay":"spark.rdd.compress","valuePatternDisplay":"true|false","description":"Whether to compress serialized RDD partitions (e.g. for StorageLevel.MEMORY_ONLY_SER). Can save substantial space at the cost of some extra CPU time."},{"keyPattern":"spark\\.speculation","valuePattern":"(true|false)","keyPatternDisplay":"spark.speculation","valuePatternDisplay":"true|false","description":"Whether to use speculation (recommended off for streaming)"},{"keyPattern":"spark\\.es(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"es(\\.([^\\.]+))+","valuePattern":".*","keyPatternDisplay":"es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"spark\\.(storage|shuffle)\\.memoryFraction","valuePattern":"0?\\.0*([1-9])([0-9])*","keyPatternDisplay":"spark.(storage|shuffle).memoryFraction","valuePatternDisplay":"(0.0,1.0)","description":"Fraction of Java heap to use for Spark's shuffle or storage"},{"keyPattern":"spark\\.streaming\\.backpressure\\.enabled","valuePattern":"(true|false)","keyPatternDisplay":"spark.streaming.backpressure.enabled","valuePatternDisplay":"true|false","description":"Enables or disables Spark Streaming's internal backpressure mechanism (since 1.5). This enables the Spark Streaming to control the receiving rate based on the current batch scheduling delays and processing times so that the system receives only as fast as the system can process. Internally, this dynamically sets the maximum receiving rate of receivers. This rate is upper bounded by the values `spark.streaming.receiver.maxRate` and `spark.streaming.kafka.maxRatePerPartition` if they are set."},{"keyPattern":"spark\\.streaming\\.receiver\\.maxRate","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.receiver.maxRate","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which each receiver will receive data. Effectively, each stream will consume at most this number of records per second. Setting this configuration to 0 or a negative number will put no limit on the rate. See the deployment guide in the Spark Streaming programing guide for mode details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRatePerPartition","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRatePerPartition","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which data will be read from each Kafka partition when using the Kafka direct stream API introduced in Spark 1.3. See the Kafka Integration guide for more details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRetries","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRetries","valuePatternDisplay":"numeric","description":"Maximum number of consecutive retries the driver will make in order to find the latest offsets on the leader of each partition (a default value of 1 means that the driver will make a maximum of 2 attempts). Only applies to the Kafka direct stream API introduced in Spark 1.3."},{"keyPattern":"spark\\.streaming\\.ui\\.retainedBatches","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.ui.retainedBatches","valuePatternDisplay":"numeric","description":"How many batches the Spark Streaming UI and status APIs remember before garbage collecting."}],"enableReactNotebookComments":true,"enableAdminPasswordReset":false,"enableResetPassword":true,"maxClusterTagValueLength":255,"enableJobsSparkUpgrade":true,"perClusterAutoterminationEnabled":false,"enableNotebookCommandNumbers":true,"sparkVersions":[{"key":"1.6.3-db2-hadoop2-scala2.10","displayName":"Spark 1.6.3-db2 (Hadoop 2, Scala 2.10)","packageLabel":"spark-image-aba860a0ffce4f3471fb14aefdcb1d768ac66a53a5ad884c48745ef98aeb9d67","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.1-db5-scala2.11","displayName":"Spark 2.1.1-db5 (Scala 2.11)","packageLabel":"spark-image-08d9fc1551087e0876236f19640c4a83116b1649f15137427d21c9056656e80e","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.x-ubuntu15.10","displayName":"Spark 1.6.x (Hadoop 1)","packageLabel":"spark-image-8cea23fb9094e174bf5815d79009f4a8e383eb86cf2909cf6c6434ed8da2a16a","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"1.4.x-ubuntu15.10","displayName":"Spark 1.4.1 (Hadoop 1)","packageLabel":"spark-image-f710650fb8aaade8e4e812368ea87c45cd8cd0b5e6894ca6c94f3354e8daa6dc","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.2.x-scala2.11","displayName":"3.0 beta (Scala 2.11)","packageLabel":"spark-image-67ab3a06d1e83d5b60df7063245eb419a2e9fe329aeeb7e7d9713332c669bb17","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"2.1.0-db2-scala2.11","displayName":"Spark 2.1.0-db2 (Scala 2.11)","packageLabel":"spark-image-267c4490a3ab8a39acdbbd9f1d36f6decdecebf013e30dd677faff50f1d9cf8b","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"2.1.x-gpu-scala2.11","displayName":"Spark 2.1 (Auto-updating, GPU, Scala 2.11 experimental)","packageLabel":"spark-image-f43ddf8ad9acfd71338f5a51345f077173d236016ad7e39ffd6f698403acd4ea","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.0.0-ubuntu15.10-scala2.10","displayName":"Spark 2.0.0 (Scala 2.10)","packageLabel":"spark-image-073c1b52ace74f251fae2680624a0d8d184a8b57096d1c21c5ce56c29be6a37a","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.2-db3-scala2.10","displayName":"Spark 2.0.2-db3 (Scala 2.10)","packageLabel":"spark-image-584091dedb690de20e8cf22d9e02fdcce1281edda99eedb441a418d50e28088f","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.0-db1-scala2.11","displayName":"Spark 2.1.0-db1 (Scala 2.11)","packageLabel":"spark-image-e8ad5b72cf0f899dcf2b4720c1f572ab0e87a311d6113b943b4e1d4a7edb77eb","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.1.1-db4-scala2.11","displayName":"Spark 2.1.1-db4 (Scala 2.11)","packageLabel":"spark-image-52bca0ca866e3f4243d3820a783abf3b9b3b553edf234abef14b892657ceaca9","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.0-db2-scala2.10","displayName":"Spark 2.1.0-db2 (Scala 2.10)","packageLabel":"spark-image-a2ca4f6b58c95f78dca91b1340305ab3fe32673bd894da2fa8e1dc8a9f8d0478","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"1.6.x-ubuntu15.10-hadoop1","displayName":"Spark 1.6.x (Hadoop 1)","packageLabel":"spark-image-8cea23fb9094e174bf5815d79009f4a8e383eb86cf2909cf6c6434ed8da2a16a","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"2.0.2-db4-scala2.11","displayName":"Spark 2.0.2-db4 (Scala 2.11)","packageLabel":"spark-image-7dbc7583e8271765b8a1508cb9e832768e35489bbde2c4c790bc6766aee2fd7f","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.1-ubuntu15.10-hadoop1","displayName":"Spark 1.6.1 (Hadoop 1)","packageLabel":"spark-image-21d1cac181b7b8856dd1b4214a3a734f95b5289089349db9d9c926cb87d843db","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.x-gpu-scala2.11","displayName":"Spark 2.0 (Auto-updating, GPU, Scala 2.11 experimental)","packageLabel":"spark-image-968b89f1d0ec32e1ee4dacd04838cae25ef44370a441224177a37980d539d83a","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.2-ubuntu15.10-hadoop1","displayName":"Spark 1.6.2 (Hadoop 1)","packageLabel":"spark-image-8cea23fb9094e174bf5815d79009f4a8e383eb86cf2909cf6c6434ed8da2a16a","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"1.6.3-db1-hadoop2-scala2.10","displayName":"Spark 1.6.3-db1 (Hadoop 2, Scala 2.10)","packageLabel":"spark-image-eaa8d9b990015a14e032fb2e2e15be0b8d5af9627cd01d855df728b67969d5d9","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.3-db2-hadoop1-scala2.10","displayName":"Spark 1.6.3-db2 (Hadoop 1, Scala 2.10)","packageLabel":"spark-image-14112ea0645bea94333a571a150819ce85573cf5541167d905b7e6588645cf3b","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.2-ubuntu15.10-hadoop2","displayName":"Spark 1.6.2 (Hadoop 2)","packageLabel":"spark-image-161245e66d887cd775e23286a54bab0b146143e1289f25bd1732beac454a1561","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"1.6.1-ubuntu15.10-hadoop2","displayName":"Spark 1.6.1 (Hadoop 2)","packageLabel":"spark-image-4cafdf8bc6cba8edad12f441e3b3f0a8ea27da35c896bc8290e16b41fd15496a","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.2-db2-scala2.10","displayName":"Spark 2.0.2-db2 (Scala 2.10)","packageLabel":"spark-image-36d48f22cca7a907538e07df71847dd22aaf84a852c2eeea2dcefe24c681602f","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.x-ubuntu15.10-scala2.11","displayName":"Spark 2.0 (Ubuntu 15.10, Scala 2.11, deprecated)","packageLabel":"spark-image-8e1c50d626a52eac5a6c8129e09ae206ba9890f4523775f77af4ad6d99a64c44","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.x-scala2.10","displayName":"Spark 2.0 (Auto-updating, Scala 2.10)","packageLabel":"spark-image-859e88079f97f58d50e25163b39a1943d1eeac0b6939c5a65faba986477e311a","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.1-db4-scala2.10","displayName":"Spark 2.1.1-db4 (Scala 2.10)","packageLabel":"spark-image-c7c0224de396cd1563addc1ae4bca6ba823780b6babe6c3729ddf73008f29ba4","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.0.2-db1-scala2.11","displayName":"Spark 2.0.2-db1 (Scala 2.11)","packageLabel":"spark-image-c2d623f03dd44097493c01aa54a941fc31978ebe6d759b36c75b716b2ff6ab9c","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.2-db4-scala2.10","displayName":"Spark 2.0.2-db4 (Scala 2.10)","packageLabel":"spark-image-859e88079f97f58d50e25163b39a1943d1eeac0b6939c5a65faba986477e311a","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.1-db5-scala2.10","displayName":"Spark 2.1.1-db5 (Scala 2.10)","packageLabel":"spark-image-74133df2c13950431298d1cab3e865c191d83ac33648a8590495c52fc644c654","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.5.x-ubuntu15.10","displayName":"Spark 1.5.2 (Hadoop 1)","packageLabel":"spark-image-c9d2a8abf41f157a4acc6d52bc721090346f6fea2de356f3a66e388f54481698","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.2.x-scala2.10","displayName":"3.0 beta (Scala 2.10)","packageLabel":"spark-image-d549f2d4a523994ecdf37e531b51d5ec7d8be51534bb0ca5322eaad28ba8f557","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"3.0.x-scala2.11","displayName":"3.0 beta (Scala 2.11)","packageLabel":"spark-image-67ab3a06d1e83d5b60df7063245eb419a2e9fe329aeeb7e7d9713332c669bb17","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.0.x-scala2.11","displayName":"Spark 2.0 (Auto-updating, Scala 2.11)","packageLabel":"spark-image-7dbc7583e8271765b8a1508cb9e832768e35489bbde2c4c790bc6766aee2fd7f","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.x-scala2.10","displayName":"Spark 2.1 (Auto-updating, Scala 2.10)","packageLabel":"spark-image-74133df2c13950431298d1cab3e865c191d83ac33648a8590495c52fc644c654","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.0-db3-scala2.10","displayName":"Spark 2.1.0-db3 (Scala 2.10)","packageLabel":"spark-image-25a17d070af155f10c4232dcc6248e36a2eb48c24f8d4fc00f34041b86bd1626","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.0.2-db2-scala2.11","displayName":"Spark 2.0.2-db2 (Scala 2.11)","packageLabel":"spark-image-4fa852ba378e97815083b96c9cada7b962a513ec23554a5fc849f7f1dd8c065a","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"1.3.x-ubuntu15.10","displayName":"Spark 1.3.0 (Hadoop 1)","packageLabel":"spark-image-40d2842670bc3dc178b14042501847d76171437ccf70613fa397a7a24c48b912","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.0.1-db1-scala2.11","displayName":"Spark 2.0.1-db1 (Scala 2.11)","packageLabel":"spark-image-10ab19f634bbfdb860446c326a9f76dc25bfa87de6403b980566279142a289ea","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.2-db3-scala2.11","displayName":"Spark 2.0.2-db3 (Scala 2.11)","packageLabel":"spark-image-7fd7aaa89d55692e429115ae7eac3b1a1dc4de705d50510995f34306b39c2397","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.3-db1-hadoop1-scala2.10","displayName":"Spark 1.6.3-db1 (Hadoop 1, Scala 2.10)","packageLabel":"spark-image-d50af1032799546b8ccbeeb76889a20c819ebc2a0e68ea20920cb30d3895d3ae","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.0.2-db1-scala2.10","displayName":"Spark 2.0.2-db1 (Scala 2.10)","packageLabel":"spark-image-654bdd6e9bad70079491987d853b4b7abf3b736fff099701501acaabe0e75c41","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.x-ubuntu15.10","displayName":"Spark 2.0 (Ubuntu 15.10, Scala 2.10, deprecated)","packageLabel":"spark-image-a659f3909d51b38d297b20532fc807ecf708cfb7440ce9b090c406ab0c1e4b7e","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.0.1-db1-scala2.10","displayName":"Spark 2.0.1-db1 (Scala 2.10)","packageLabel":"spark-image-5a13c2db3091986a4e7363006cc185c5b1108c7761ef5d0218506cf2e6643840","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.1.x-scala2.11","displayName":"Spark 2.1 (Auto-updating, Scala 2.11)","packageLabel":"spark-image-08d9fc1551087e0876236f19640c4a83116b1649f15137427d21c9056656e80e","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.1.0-db1-scala2.10","displayName":"Spark 2.1.0-db1 (Scala 2.10)","packageLabel":"spark-image-f0ab82a5deb7908e0d159e9af066ba05fb56e1edb35bdad41b7ad2fd62a9b546","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"3.0.x-scala2.10","displayName":"3.0 beta (Scala 2.10)","packageLabel":"spark-image-d549f2d4a523994ecdf37e531b51d5ec7d8be51534bb0ca5322eaad28ba8f557","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.0-ubuntu15.10","displayName":"Spark 1.6.0 (Hadoop 1)","packageLabel":"spark-image-10ef758029b8c7e19cd7f4fb52fff9180d75db92ca071bd94c47f3c1171a7cb5","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"1.6.x-ubuntu15.10-hadoop2","displayName":"Spark 1.6.x (Hadoop 2)","packageLabel":"spark-image-161245e66d887cd775e23286a54bab0b146143e1289f25bd1732beac454a1561","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"2.0.0-ubuntu15.10-scala2.11","displayName":"Spark 2.0.0 (Scala 2.11)","packageLabel":"spark-image-b4ec141e751f201399f8358a82efee202560f7ed05e1a04a2ae8778f6324b909","upgradable":true,"deprecated":true,"customerVisible":false},{"key":"2.1.0-db3-scala2.11","displayName":"Spark 2.1.0-db3 (Scala 2.11)","packageLabel":"spark-image-ccbc6b73f158e2001fc1fb8c827bfdde425d8bd6d65cb7b3269784c28bb72c16","upgradable":true,"deprecated":false,"customerVisible":true}],"enableRestrictedClusterCreation":true,"enableFeedback":true,"enableClusterAutoScaling":false,"enableUserVisibleDefaultTags":true,"defaultNumWorkers":0,"serverContinuationTimeoutMillis":10000,"driverStderrFilePrefix":"stderr","enableNotebookRefresh":false,"accountsOwnerUrl":"https://accounts.cloud.databricks.com/registration.html#login","driverStdoutFilePrefix":"stdout","defaultNodeTypeToPricingUnitsMap":{"r3.2xlarge":2,"i3.4xlarge":6,"class-node":1,"m4.2xlarge":1.5,"r4.xlarge":1,"m4.4xlarge":3,"r4.16xlarge":16,"Standard_DS11":0.5,"p2.8xlarge":16,"m4.10xlarge":8,"r3.8xlarge":8,"r4.4xlarge":4,"dev-tier-node":1,"c3.8xlarge":4,"r3.4xlarge":4,"i2.4xlarge":6,"m4.xlarge":0.75,"r4.8xlarge":8,"r4.large":0.5,"Standard_DS12":1,"development-node":1,"i2.2xlarge":3,"g2.8xlarge":6,"i3.large":0.75,"memory-optimized":1,"m4.large":0.375,"p2.16xlarge":24,"i3.8xlarge":12,"i3.16xlarge":24,"Standard_DS12_v2":1,"Standard_DS13":2,"Standard_DS11_v2":0.5,"Standard_DS13_v2":2,"c3.2xlarge":1,"c4.2xlarge":1,"i2.xlarge":1.5,"compute-optimized":1,"c4.4xlarge":2,"i3.2xlarge":3,"c3.4xlarge":2,"g2.2xlarge":1.5,"p2.xlarge":2,"m4.16xlarge":12,"c4.8xlarge":4,"i3.xlarge":1.5,"r3.xlarge":1,"r4.2xlarge":2,"i2.8xlarge":12},"enableSparkDocsSearch":true,"sparkHistoryServerEnabled":true,"enableEBSVolumesUI":false,"sanitizeMarkdownHtml":true,"metastoreServiceRowLimit":1000000,"enableIPythonImportExport":true,"enableClusterTagsUIForJobs":true,"enableClusterTagsUI":false,"enableNotebookHistoryDiffing":true,"branch":"2.48","accountsLimit":3,"enableSparkEnvironmentVariables":true,"enableX509Authentication":false,"enableStructuredStreamingNbOptimizations":true,"enableNotebookGitBranching":true,"local":false,"enableNotebookLazyRenderWrapper":false,"enableClusterAutoScalingForJobs":false,"enableStrongPassword":false,"displayDefaultContainerMemoryGB":6,"enableNotebookCommandMode":true,"disableS3TableImport":false,"deploymentMode":"production","useSpotForWorkers":true,"enableNonPollingTableCall":true,"enableUserInviteWorkflow":true,"enableStaticNotebooks":true,"enableCssTransitions":true,"serverlessEnableElasticDisk":true,"minClusterTagKeyLength":1,"showHomepageFeaturedLinks":true,"pricingURL":"https://databricks.com/product/pricing","enableClusterAclsConfig":false,"useTempS3UrlForTableUpload":false,"notifyLastLogin":false,"enableSshKeyUIByTier":false,"defaultAutomatedPricePerDBU":0.2,"enableNotebookGitVersioning":true,"files":"files/","feedbackEmail":"feedback@databricks.com","enableDriverLogsUI":true,"enableWorkspaceAclsConfig":false,"dropzoneMaxFileSize":2047,"enableNewClustersList":false,"enableNewDashboardViews":true,"driverLog4jFilePrefix":"log4j","enableSingleSignOn":true,"enableMavenLibraries":true,"displayRowLimit":1000,"deltaProcessingAsyncEnabled":true,"enableSparkEnvironmentVariablesUI":false,"defaultSparkVersion":{"key":"2.1.x-scala2.10","displayName":"Spark 2.1 (Auto-updating, Scala 2.10)","packageLabel":"spark-image-74133df2c13950431298d1cab3e865c191d83ac33648a8590495c52fc644c654","upgradable":true,"deprecated":false,"customerVisible":true},"enableCustomSpotPricing":false,"enableMountAclsConfig":false,"defaultAutoterminationMin":180,"useDevTierHomePage":true,"enableClusterClone":true,"enableNotebookLineNumbers":true,"enablePublishHub":false,"notebookHubUrl":"http://hub.dev.databricks.com/","showSqlEndpoints":false,"enableNotebookDatasetInfoView":false,"enableClusterAclsByTier":false,"databricksDocsBaseUrl":"https://docs.databricks.com/","cloud":"AWS","disallowAddingAdmins":true,"enableSparkConfUI":true,"featureTier":"DEVELOPER_BASIC_TIER","mavenCentralSearchEndpoint":"http://search.maven.org/solrsearch/select","enableOrgSwitcherUI":true,"clustersLimit":1,"enableJdbcImport":true,"enableElasticDisk":false,"logfiles":"logfiles/","enableWebappSharding":true,"enableClusterDeltaUpdates":true,"enableSingleSignOnLogin":false,"ebsVolumeSizeLimitGB":{"GENERAL_PURPOSE_SSD":[100,4096],"THROUGHPUT_OPTIMIZED_HDD":[500,4096]},"enableMountAcls":false,"requireEmailUserName":true,"dbcFeedbackURL":"mailto:feedback@databricks.com","enableMountAclService":true,"serverlessClustersByDefault":false,"enableWorkspaceAcls":false,"maxClusterTagKeyLength":127,"gitHash":"3594d0006cfff297612107c9065dcba3695eed9c","showWorkspaceFeaturedLinks":true,"signupUrl":"https://databricks.com/try-databricks","allowFeedbackForumAccess":true,"enableImportFromUrl":true,"enableMiniClusters":true,"enableNewJobList":true,"enableNewTableUI":true,"enableDebugUI":false,"enableStreamingMetricsDashboard":true,"allowNonAdminUsers":true,"enableSingleSignOnByTier":false,"enableJobsRetryOnTimeout":true,"useStandardTierUpgradeTooltips":true,"staticNotebookResourceUrl":"https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/","enableSpotClusterType":true,"enableSparkPackages":true,"dynamicSparkVersions":true,"enableClusterTagsUIByTier":false,"enableNotebookHistoryUI":true,"enableClusterLoggingUI":true,"enableDatabaseDropdownInTableUI":true,"showDebugCounters":false,"enableInstanceProfilesUI":false,"enableFolderHtmlExport":true,"homepageFeaturedLinks":[{"linkURI":"https://docs.databricks.com/_static/notebooks/gentle-introduction-to-apache-spark.html","displayName":"Introduction to Apache Spark on Databricks","icon":"img/home/Python_icon.svg"},{"linkURI":"https://docs.databricks.com/_static/notebooks/databricks-for-data-scientists.html","displayName":"Databricks for Data Scientists","icon":"img/home/Scala_icon.svg"},{"linkURI":"https://docs.databricks.com/_static/notebooks/structured-streaming-python.html","displayName":"Introduction to Structured Streaming","icon":"img/home/Python_icon.svg"}],"enableClusterStart":false,"enableEBSVolumesUIByTier":false,"singleSignOnComingSoon":false,"upgradeURL":"https://accounts.cloud.databricks.com/registration.html#login","maxAutoterminationMinutes":10000,"autoterminateClustersByDefault":true,"notebookLoadingBackground":"#fff","sshContainerForwardedPort":2200,"enableServerAutoComplete":true,"enableStaticHtmlImport":true,"enableInstanceProfilesByTier":false,"showForgotPasswordLink":true,"defaultMemoryPerContainerMB":6000,"enablePresenceUI":true,"minAutoterminationMinutes":10,"accounts":true,"useOnDemandClustersByDefault":true,"useFramedStaticNotebooks":false,"enableNewProgressReportUI":true,"enableAutoCreateUserUI":true,"defaultCoresPerContainer":4,"showTerminationReason":true,"enableNewClustersGet":true,"showPricePerDBU":false,"showSqlProxyUI":true,"enableNotebookErrorHighlighting":true};</script>
<script>var __DATABRICKS_NOTEBOOK_MODEL = {"version":"NotebookV1","origId":2831780622018207,"name":"4.1_SparkR-Introduction","language":"r","commands":[{"version":"CommandV1","origId":2395288974719308,"guid":"f83a6d2e-8aa2-4d4f-a509-eb8132a073b5","subtype":"command","commandType":"auto","position":0.5,"command":"%md\n# 4.1 SparkR Introduction\n\nThis notebook demonstrates the basics of using SparkR - the Apache Spark api for R.\n\nSparkR uses the same underlying implementation as pyspark SQL but the API is designed resemble (to some degree)\nthe functions for R data frames.\n","commandVersion":0,"state":"error","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0,"submitTime":0,"finishTime":0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"2d9e1896-6a13-4525-8c12-2b08882a9a91"},{"version":"CommandV1","origId":2831780622018209,"guid":"2797679d-cf6e-444c-87b2-6ecff8d42093","subtype":"command","commandType":"auto","position":1.0,"command":"# initialise the sparkR session\nsparkR.session()","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>Java ref type org.apache.spark.sql.SparkSession id 1 </pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":null,"error":null,"workflows":[],"startTime":1499582131374,"submitTime":1499582105339,"finishTime":1499582131424,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"fff755fe-be1e-44da-b374-a773dd31cc9e"},{"version":"CommandV1","origId":2831780622018212,"guid":"ba8457a4-2ccd-403f-9931-0770a36664e4","subtype":"command","commandType":"auto","position":2.0,"command":"# read a Spark DataFrame from a csv file\ndf = read.df('data/winequality-white.csv', 'csv', header=TRUE, sep=';', inferSchema=TRUE)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'></pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":"<code style=\"font-size:10p\"> Error : Error in loadDF : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.io.IOException: Could not read footer for file: FileStatus{path=dbfs:/data/winequality-red.csv; isDirectory=false; length=84199; replication=0; blocksize=0; modification_time=0; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false} </code>","error":null,"workflows":[],"startTime":1499685864194,"submitTime":1499685861409,"finishTime":1499685864729,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"5c89f845-30b0-4fea-a790-923f9e9a58e4"},{"version":"CommandV1","origId":2831780622018219,"guid":"3a23b7da-6d3c-4940-a98c-f815de29da7c","subtype":"command","commandType":"auto","position":3.0,"command":"# display the schema of the DataFrame\nprintSchema(df)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>root\n |-- fixed acidity: double (nullable = true)\n |-- volatile acidity: double (nullable = true)\n |-- citric acid: double (nullable = true)\n |-- residual sugar: double (nullable = true)\n |-- chlorides: double (nullable = true)\n |-- free sulfur dioxide: double (nullable = true)\n |-- total sulfur dioxide: double (nullable = true)\n |-- density: double (nullable = true)\n |-- pH: double (nullable = true)\n |-- sulphates: double (nullable = true)\n |-- alcohol: double (nullable = true)\n |-- quality: integer (nullable = true)</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":null,"error":null,"workflows":[],"startTime":1499685896020,"submitTime":1499685896015,"finishTime":1499685896027,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"fbbecf38-57e5-494c-a97a-510c32872f8a"},{"version":"CommandV1","origId":2831780622018220,"guid":"6593b897-2b43-433f-8d5c-897d41a8585d","subtype":"command","commandType":"auto","position":4.0,"command":"# display the head rows of a Spark DataFrame\nhead(df)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>  fixed acidity volatile acidity citric acid residual sugar chlorides\n1           7.0             0.27        0.36           20.7     0.045\n2           6.3             0.30        0.34            1.6     0.049\n3           8.1             0.28        0.40            6.9     0.050\n4           7.2             0.23        0.32            8.5     0.058\n5           7.2             0.23        0.32            8.5     0.058\n6           8.1             0.28        0.40            6.9     0.050\n  free sulfur dioxide total sulfur dioxide density   pH sulphates alcohol\n1                  45                  170  1.0010 3.00      0.45     8.8\n2                  14                  132  0.9940 3.30      0.49     9.5\n3                  30                   97  0.9951 3.26      0.44    10.1\n4                  47                  186  0.9956 3.19      0.40     9.9\n5                  47                  186  0.9956 3.19      0.40     9.9\n6                  30                   97  0.9951 3.26      0.44    10.1\n  quality\n1       6\n2       6\n3       6\n4       6\n5       6\n6       6</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":null,"error":null,"workflows":[],"startTime":1499686100795,"submitTime":1499686100789,"finishTime":1499686101018,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"a3d5d0bd-2193-4bb2-8385-634b76a51c18"},{"version":"CommandV1","origId":2831780622018224,"guid":"79871e6f-566d-4b06-b397-6270449b73d3","subtype":"command","commandType":"auto","position":4.5,"command":"# display the Spark DataFrame\nshowDF(df)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>+-------------+----------------+-----------+--------------+---------+-------------------+--------------------+-------+----+---------+-------+-------+\n|fixed acidity|volatile acidity|citric acid|residual sugar|chlorides|free sulfur dioxide|total sulfur dioxide|density|  pH|sulphates|alcohol|quality|\n+-------------+----------------+-----------+--------------+---------+-------------------+--------------------+-------+----+---------+-------+-------+\n|          7.0|            0.27|       0.36|          20.7|    0.045|               45.0|               170.0|  1.001| 3.0|     0.45|    8.8|      6|\n|          6.3|             0.3|       0.34|           1.6|    0.049|               14.0|               132.0|  0.994| 3.3|     0.49|    9.5|      6|\n|          8.1|            0.28|        0.4|           6.9|     0.05|               30.0|                97.0| 0.9951|3.26|     0.44|   10.1|      6|\n|          7.2|            0.23|       0.32|           8.5|    0.058|               47.0|               186.0| 0.9956|3.19|      0.4|    9.9|      6|\n|          7.2|            0.23|       0.32|           8.5|    0.058|               47.0|               186.0| 0.9956|3.19|      0.4|    9.9|      6|\n|          8.1|            0.28|        0.4|           6.9|     0.05|               30.0|                97.0| 0.9951|3.26|     0.44|   10.1|      6|\n|          6.2|            0.32|       0.16|           7.0|    0.045|               30.0|               136.0| 0.9949|3.18|     0.47|    9.6|      6|\n|          7.0|            0.27|       0.36|          20.7|    0.045|               45.0|               170.0|  1.001| 3.0|     0.45|    8.8|      6|\n|          6.3|             0.3|       0.34|           1.6|    0.049|               14.0|               132.0|  0.994| 3.3|     0.49|    9.5|      6|\n|          8.1|            0.22|       0.43|           1.5|    0.044|               28.0|               129.0| 0.9938|3.22|     0.45|   11.0|      6|\n|          8.1|            0.27|       0.41|          1.45|    0.033|               11.0|                63.0| 0.9908|2.99|     0.56|   12.0|      5|\n|          8.6|            0.23|        0.4|           4.2|    0.035|               17.0|               109.0| 0.9947|3.14|     0.53|    9.7|      5|\n|          7.9|            0.18|       0.37|           1.2|     0.04|               16.0|                75.0|  0.992|3.18|     0.63|   10.8|      5|\n|          6.6|            0.16|        0.4|           1.5|    0.044|               48.0|               143.0| 0.9912|3.54|     0.52|   12.4|      7|\n|          8.3|            0.42|       0.62|         19.25|     0.04|               41.0|               172.0| 1.0002|2.98|     0.67|    9.7|      5|\n|          6.6|            0.17|       0.38|           1.5|    0.032|               28.0|               112.0| 0.9914|3.25|     0.55|   11.4|      7|\n|          6.3|            0.48|       0.04|           1.1|    0.046|               30.0|                99.0| 0.9928|3.24|     0.36|    9.6|      6|\n|          6.2|            0.66|       0.48|           1.2|    0.029|               29.0|                75.0| 0.9892|3.33|     0.39|   12.8|      8|\n|          7.4|            0.34|       0.42|           1.1|    0.033|               17.0|               171.0| 0.9917|3.12|     0.53|   11.3|      6|\n|          6.5|            0.31|       0.14|           7.5|    0.044|               34.0|               133.0| 0.9955|3.22|      0.5|    9.5|      5|\n+-------------+----------------+-----------+--------------+---------+-------------------+--------------------+-------+----+---------+-------+-------+\nonly showing top 20 rows</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":null,"error":null,"workflows":[],"startTime":1499686121813,"submitTime":1499686121807,"finishTime":1499686121954,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"5a0bc232-ba6a-492e-aa43-a368866cfb54"},{"version":"CommandV1","origId":2831780622018221,"guid":"d71074f6-e555-4941-b62d-963fce9b3ffb","subtype":"command","commandType":"auto","position":5.0,"command":"# produce a summary of a Spark DataFrame\n# please note that `summary()` returns another SparkFrame\n# which we can convert to an R data frame with `collect()`\ncollect(summary(df))","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>  summary      fixed acidity    volatile acidity         citric acid\n1   count               4898                4898                4898\n2    mean  6.854787668436075 0.27824111882401087 0.33419150673743736\n3  stddev 0.8438682276875127 0.10079454842486532 0.12101980420298254\n4     min                3.8                0.08                 0.0\n5     max               14.2                 1.1                1.66\n     residual sugar            chlorides free sulfur dioxide\n1              4898                 4898                4898\n2 6.391414863209486   0.0457723560636995   35.30808493262556\n3 5.072057784014878 0.021847968093728805   17.00713732523259\n4               0.6                0.009                 2.0\n5              65.8                0.346               289.0\n  total sulfur dioxide              density                  pH\n1                 4898                 4898                4898\n2   138.36065741118824   0.9940273764801896  3.1882666394446693\n3   42.498064554142985 0.002990906916936997 0.15100059961506673\n4                  9.0              0.98711                2.72\n5                440.0              1.03898                3.82\n            sulphates            alcohol            quality\n1                4898               4898               4898\n2  0.4898468762760325 10.514267047774638   5.87790935075541\n3 0.11412583394883222   1.23062056775732 0.8856385749678322\n4                0.22                8.0                  3\n5                1.08               14.2                  9</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":null,"error":null,"workflows":[],"startTime":1499686218024,"submitTime":1499686218019,"finishTime":1499686218645,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"774d4e43-1e5b-498c-80f1-13ebbd483d6f"},{"version":"CommandV1","origId":2831780622018228,"guid":"86e76de2-c62d-407e-8708-530498663d93","subtype":"command","commandType":"auto","position":5.25,"command":"# DataFrams can be filtered, grouped, aggregated and sorted\n# e.g. average `alcohol` aand max log(pH) grouped by `quality` for `density` > 1.0 sorted by `quality`\ncollect(orderBy(summarize(groupBy(filter(df, \n                                     df$density > 1.0),\n                             df$quality),\n                   avg(df$alcohol), max(log(df$pH))),\n          df$quality))","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>  quality avg(alcohol) max(LOG(pH))\n1       3    11.000000     1.111858\n2       4     9.000000     1.184790\n3       5     8.947368     1.249902\n4       6     9.168966     1.255616\n5       7     8.880000     1.166271\n6       8     8.800000     1.187843</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":"<code style=\"font-size:10p\"> Error in collect(summarize(groupBy(select(df, df$density &gt; 1), df$quality),  :  </code>","error":"<pre style=\"font-size:10p\">Error in collect(summarize(groupBy(select(df, df$density &gt; 1), df$quality),  : \n  error in evaluating the argument 'x' in selecting a method for function 'collect': Error in handleErrors(returnStatus, conn) : \n  org.apache.spark.sql.AnalysisException: resolved attribute(s) quality#30,alcohol#29,pH#27 missing from (density &gt; 1.0)#1462 in operator !Aggregate [quality#30], [quality#30, avg(alcohol#29) AS avg(alcohol)#1469, avg(pH#27) AS avg(pH)#1470];;\n!Aggregate [quality#30], [quality#30, avg(alcohol#29) AS avg(alcohol)#1469, avg(pH#27) AS avg(pH)#1470]\n+- Project [(density#26 &gt; 1.0) AS (density &gt; 1.0)#1462]\n   +- Relation[fixed acidity#19,volatile acidity#20,citric acid#21,residual sugar#22,chlorides#23,free sulfur dioxide#24,total sulfur dioxide#25,density#26,pH#27,sulphates#28,alcohol#29,quality#30] csv\n\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:39)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:60)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$</pre>","workflows":[],"startTime":1499686515656,"submitTime":1499686515650,"finishTime":1499686516716,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"9cbcbe59-f10e-4c24-97ef-3b616f7be17c"},{"version":"CommandV1","origId":2831780622018226,"guid":"dca0b0b0-30ee-4ec7-88a1-a335bdb61b69","subtype":"command","commandType":"auto","position":5.5,"command":"# we can also use a few machine learning models\n\n# split the DataFrame into the traning and testing DataFrames\ntrainAndTestDFs = randomSplit(df, c(0.75, 0.25), 13)\ntrainDF = trainAndTestDFs[[1]]\nprint(count(trainDF))\ntestDF = trainAndTestDFs[[2]]\nprint(count(testDF))","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>[1] 3625\n[1] 1273</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":null,"error":null,"workflows":[],"startTime":1499686673445,"submitTime":1499686673434,"finishTime":1499686673904,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"18c2d122-013b-40f4-8500-d5dbf226738c"},{"version":"CommandV1","origId":2831780622018222,"guid":"145b9b34-2586-4652-bc6d-2534b29c7a69","subtype":"command","commandType":"auto","position":6.0,"command":"# train a linear regression model\nglmModel = spark.glm(trainDF, quality ~ ., family = \"gaussian\")\nsummary(glmModel)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>\nDeviance Residuals: \n(Note: These are approximate quantiles with relative error <= 0.01)\n    Min       1Q   Median       3Q      Max  \n-3.7028  -0.4953  -0.0483   0.4346   2.8136  \n\nCoefficients:\n                      Estimate   Std. Error  t value   Pr(>|t|)  \n(Intercept)           141.91     20.667      6.8667    7.7005e-12\nfixed acidity         0.054059   0.023535    2.297     0.021678  \nvolatile acidity      -1.8773    0.13289     -14.127   0         \ncitric acid           0.010091   0.1155      0.087369  0.93038   \nresidual sugar        0.080612   0.0084207   9.573     0         \nchlorides             -0.13222   0.65353     -0.20231  0.83969   \nfree sulfur dioxide   0.0025999  0.00097505  2.6665    0.0076993 \ntotal sulfur dioxide  0.0002466  0.00044141  0.55865   0.57643   \ndensity               -141.98    20.967      -6.7718   1.4789e-11\npH                    0.65118    0.12034     5.4112    6.6682e-08\nsulphates             0.61715    0.11641     5.3016    1.2166e-07\nalcohol               0.21282    0.026941    7.8995    3.7748e-15\n\n(Dispersion parameter for gaussian family taken to be 0.5669293)\n\n    Null deviance: 2842.3  on 3624  degrees of freedom\nResidual deviance: 2048.3  on 3613  degrees of freedom\nAIC: 8244\n\nNumber of Fisher Scoring iterations: 1\n</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":"<code style=\"font-size:10p\"> Error in spark.glm(trainDF, quality ~ ., family = &quot;gaussian&quot;) :  </code>","error":"<pre style=\"font-size:10p\">Error in spark.glm(trainDF, quality ~ ., family = &quot;gaussian&quot;) : \n  error in evaluating the argument 'data' in selecting a method for function 'spark.glm': Error: object 'trainDF' not found\n</pre>","workflows":[],"startTime":1499686675976,"submitTime":1499686675971,"finishTime":1499686678896,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"9d0573b2-5fa0-4a1c-9903-02d29edd4482"},{"version":"CommandV1","origId":2831780622018225,"guid":"0badb2f2-04f8-43ed-bb0a-e78d0a7a0a70","subtype":"command","commandType":"auto","position":8.0,"command":"# make predictions on the testing DataFrame\npredictionsDF <- predict(glmModel, testDF)\nhead(select(predictionsDF, 'prediction'))","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>  prediction\n1   6.991938\n2   6.778693\n3   5.836421\n4   7.148434\n5   6.232502\n6   5.781459</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":"<code style=\"font-size:10p\"> Warning messages: </code>","error":"<pre style=\"font-size:10p\">Warning messages:\n1: In odbcDriverConnect(&quot;DSN=spark;UID=root&quot;) :\n  [RODBC] ERROR: state HY000, code 34, message [unixODBC][Simba][SparkODBC] (34) Error from Spark: No more data to read..\n2: In odbcDriverConnect(&quot;DSN=spark;UID=root&quot;) : ODBC connection failed\n3: 'sparkR.init' is deprecated.\nUse 'sparkR.session' instead.\nSee help(&quot;Deprecated&quot;) \nError in predict(glmModel, testDF) : \n  error in evaluating the argument 'object' in selecting a method for function 'predict': Error: object 'glmModel' not found\n</pre>","workflows":[],"startTime":1499686720583,"submitTime":1499686720578,"finishTime":1499686720841,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"5ae4fdea-3119-4ac0-84c8-954433d0cda7"},{"version":"CommandV1","origId":2831780622018227,"guid":"b454534c-81d3-49fe-a7fe-ed957186f67b","subtype":"command","commandType":"auto","position":9.0,"command":"# calculate the RMSE (Root Mean Squared Error) on testing set\n\nresultDF = collect(agg(predictionsDF, avg((predictionsDF$prediction - predictionsDF$quality)* \n                                                (predictionsDF$prediction - predictionsDF$quality))))\nsqrt(as.numeric(resultDF))","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>[1] 0.7480906</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":"<code style=\"font-size:10p\"> Error in parse(text = DATABRICKS_CURRENT_TEMP_CMD__) :  </code>","error":null,"workflows":[],"startTime":1499686822749,"submitTime":1499686822743,"finishTime":1499686823074,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"44cf0a10-b52b-42f9-af51-fcf01e2d453e"},{"version":"CommandV1","origId":2831780622018229,"guid":"eb301e4e-b89b-4e9e-8539-d357c06c1e4e","subtype":"command","commandType":"auto","position":10.0,"command":"# use gapplyCollect to process the Spark DataFrame with a user defined function\n# the function will be called with the grouping key and the R dataframe which contatins \n# all the rows for this group.\n# it should return an R data frame representing the result of processing for a group.\n# gapplyCollect will then merge the resuls from all the groups into a single R data frame\n\ngapplyCollect(df, 'quality', function(key, f) {\n  apply(f, FUN= mean, MARGIN = 2)\n})","commandVersion":0,"state":"finished","results":{"type":"html","data":"<pre style=\"font-size:10p\"></pre><pre style = 'font-size:10pt'>     fixed acidity volatile acidity citric acid residual sugar  chlorides\n[1,]      6.837671        0.2605641   0.3380255       6.441606 0.04521747\n[2,]      7.600000        0.3332500   0.3360000       6.392500 0.05430000\n[3,]      6.933974        0.3020110   0.3376527       7.334969 0.05154633\n[4,]      7.420000        0.2980000   0.3860000       4.120000 0.02740000\n[5,]      7.129448        0.3812270   0.3042331       4.628221 0.05009816\n[6,]      6.657143        0.2774000   0.3265143       5.671429 0.03831429\n[7,]      6.734716        0.2627670   0.3256250       5.186477 0.03819091\n     free sulfur dioxide total sulfur dioxide   density       pH sulphates\n[1,]            35.65059             137.0473 0.9939613 3.188599 0.4911056\n[2,]            53.32500             170.6000 0.9948840 3.187500 0.4745000\n[3,]            36.43205             150.9046 0.9952626 3.168833 0.4822032\n[4,]            33.40000             116.0000 0.9914600 3.308000 0.4660000\n[5,]            23.35890             125.2791 0.9942767 3.182883 0.4761350\n[6,]            36.72000             126.1657 0.9922359 3.218686 0.4862286\n[7,]            34.12557             125.1148 0.9924524 3.213898 0.5031023\n      alcohol quality\n[1,] 10.57537       6\n[2,] 10.34500       3\n[3,]  9.80884       5\n[4,] 12.18000       9\n[5,] 10.15245       4\n[6,] 11.63600       8\n[7,] 11.36794       7</pre>","arguments":{},"addedWidgets":{},"removedWidgets":[],"datasetInfos":[]},"errorSummary":"<code style=\"font-size:10p\"> Error in handleErrors(returnStatus, conn) :  </code>","error":"<pre style=\"font-size:10p\">Error in handleErrors(returnStatus, conn) : \n  org.apache.spark.SparkException: Job aborted due to stage failure: Task 51 in stage 33.0 failed 1 times, most recent failure: Lost task 51.0 in stage 33.0 (TID 482, localhost, executor driver): org.apache.spark.SparkException: R computation failed with\n Error in `rownames&lt;-`(x, value) : \n  attempt to set 'rownames' on an object with no dimensions\nCalls: compute ... computeFunc -&gt; row.names&lt;- -&gt; row.names&lt;-.default -&gt; rownames&lt;-\nExecution halted\n\tat org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)\n\tat org.apache.spark.sql.execution.FlatMapGroupsInRExec$$anonfun$12.apply(objects.scala:404)\n\tat org.apache.spark.sql.execution.FlatMapGroupsInRExec$$anonfun$12.apply(objects.scala:386)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:287)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:287)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:287)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:99)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1442)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1430)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1429)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1429)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:803)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:803)\n\tat scala.Option.foreach(Option.scala:236)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:803)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1657)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1612)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1601)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1937)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1950)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1963)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1977)\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:362)\n\tat org.apache.spark.rdd.RDD.collect(RDD.scala:935)\n\tat org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:275)\n\tat org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2807)\n\tat org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2369)\n\tat org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2369)\n\tat org.apache.spark.sql.Dataset$$anonfun$60.apply(Dataset.scala:2791)\n\tat org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:87)\n\tat org.apache.spark.sql.execution.SQLExecution$.withFileAccessAudit(SQLExecution.scala:53)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:70)\n\tat org.apache.spark.sql.Dataset.withAction(Dataset.scala:2790)\n\tat org.apache.spark.sql.Dataset.collect(Dataset.scala:2369)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat org.apache.spark.api.r.RBackendHandler.handleMethodCall(RBackendHandler.scala:167)\n\tat org.apache.spark.api.r.RBackendHandler.channelRead0(RBackendHandler.scala:108)\n\tat org.apache.spark.api.r.RBackendHandler.channelRead0(RBackendHandler.scala:40)\n\tat io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)\n\tat io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:266)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)\n\tat io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:293)\n\tat io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:267)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)\n\tat io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)\n\tat io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)\n\tat io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:652)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:575)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:489)\n\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:451)\n\tat io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)\n\tat io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)\n\tat java.lang.Thread.run(Thread.java:745)\nCaused by: org.apache.spark.SparkException: R computation failed with\n Error in `rownames&lt;-`(x, value) : \n  attempt t\nIn addition: Warning messages:\n1: In odbcDriverConnect(&quot;DSN=spark;UID=root&quot;) :\n  [RODBC] ERROR: state HY000, code 34, message [unixODBC][Simba][SparkODBC] (34) Error from Spark: No more data to read..\n2: In odbcDriverConnect(&quot;DSN=spark;UID=root&quot;) : ODBC connection failed\n3: 'sparkR.init' is deprecated.\nUse 'sparkR.session' instead.\nSee help(&quot;Deprecated&quot;) </pre>","workflows":[],"startTime":1499687119545,"submitTime":1499687119539,"finishTime":1499687156638,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"9cfbef53-505b-434c-93de-bd81705ca54f"},{"version":"CommandV1","origId":2831780622018232,"guid":"13144cbf-f44a-44c5-b504-74358d776353","subtype":"command","commandType":"auto","position":11.0,"command":"%md\n\nFor more information on using SparkR please check the documentation:\n- SparkR Guide: <https://spark.apache.org/docs/latest/sparkr.html>\n- SparkR Documentation: <https://spark.apache.org/docs/latest/api/R/index.html>","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0,"submitTime":0,"finishTime":0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"a96a2dbb-909f-48d6-9593-0ff20b54f2da"}],"dashboards":[],"guid":"022e9b37-e106-42a0-addf-ba723a7973bc","globalVars":{},"iPythonMetadata":null,"inputWidgets":{}};</script>
<script
 src="https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/js/notebook-main.js"
 onerror="window.mainJsLoadError = true;"></script>
</head>
<body>
  <script>
if (window.mainJsLoadError) {
  var u = 'https://databricks-prod-cloudfront.cloud.databricks.com/static/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/js/notebook-main.js';
  var b = document.getElementsByTagName('body')[0];
  var c = document.createElement('div');
  c.innerHTML = ('<h1>Network Error</h1>' +
    '<p><b>Please check your network connection and try again.</b></p>' +
    '<p>Could not load a required resource: ' + u + '</p>');
  c.style.margin = '30px';
  c.style.padding = '20px 50px';
  c.style.backgroundColor = '#f5f5f5';
  c.style.borderRadius = '5px';
  b.appendChild(c);
}
</script>
</body>
</html>