{"cells":[{"cell_type":"markdown","source":["In this project, we help Hyundai Heavy Industry to build a regression model to predict the number of crew numbers the ships need. The data description is as follows.\n\n Variables\n Ship Name \n Cruise Line \n Age (as of 2013) \n Tonnage (1000s of tons) \n passengers (100s) \n Length (100s of feet) \n Cabins (100s) \n Passenger Density \n Crew (100s)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"5752beba-3091-4343-b0be-b5e41a1496ec"}}},{"cell_type":"code","source":["from pyspark.sql import SparkSession\nfrom pyspark.ml.feature import StringIndexer\nfrom pyspark.ml.linalg import Vectors\nfrom pyspark.ml.feature import VectorAssembler\nfrom pyspark.ml.regression import LinearRegression\nfrom pyspark.sql.functions import corr"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"fb38a418-b16d-4fa5-bad8-0096588f6ff1"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["spark = SparkSession.builder.appName('cruise').getOrCreate()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"24e38880-27f5-45bb-8291-ff62b3e5f132"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["df = spark.read.csv(\"dbfs:/FileStore/shared_uploads/dizhen@hsph.harvard.edu/cruise_ship_info.csv\",inferSchema=True,header=True)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"68444d4b-891b-4484-8b91-b0696c4e3a26"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["df.printSchema()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"b895ea7c-2d38-409a-8ea1-4c9783432328"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"root\n |-- Ship_name: string (nullable = true)\n |-- Cruise_line: string (nullable = true)\n |-- Age: integer (nullable = true)\n |-- Tonnage: double (nullable = true)\n |-- passengers: double (nullable = true)\n |-- length: double (nullable = true)\n |-- cabins: double (nullable = true)\n |-- passenger_density: double (nullable = true)\n |-- crew: double (nullable = true)\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["root\n |-- Ship_name: string (nullable = true)\n |-- Cruise_line: string (nullable = true)\n |-- Age: integer (nullable = true)\n |-- Tonnage: double (nullable = true)\n |-- passengers: double (nullable = true)\n |-- length: double (nullable = true)\n |-- cabins: double (nullable = true)\n |-- passenger_density: double (nullable = true)\n |-- crew: double (nullable = true)\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"f9aaa4c1-b781-4fa9-88a9-3d9cfeae46f1"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-----------+-----------+---+------------------+----------+------+------+-----------------+----+\n| Ship_name|Cruise_line|Age| Tonnage|passengers|length|cabins|passenger_density|crew|\n+-----------+-----------+---+------------------+----------+------+------+-----------------+----+\n| Journey| Azamara| 6|30.276999999999997| 6.94| 5.94| 3.55| 42.64|3.55|\n| Quest| Azamara| 6|30.276999999999997| 6.94| 5.94| 3.55| 42.64|3.55|\n|Celebration| Carnival| 26| 47.262| 14.86| 7.22| 7.43| 31.8| 6.7|\n| Conquest| Carnival| 11| 110.0| 29.74| 9.53| 14.88| 36.99|19.1|\n| Destiny| Carnival| 17| 101.353| 26.42| 8.92| 13.21| 38.36|10.0|\n| Ecstasy| Carnival| 22| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Elation| Carnival| 15| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Fantasy| Carnival| 23| 70.367| 20.56| 8.55| 10.22| 34.23| 9.2|\n|Fascination| Carnival| 19| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Freedom| Carnival| 6|110.23899999999999| 37.0| 9.51| 14.87| 29.79|11.5|\n| Glory| Carnival| 10| 110.0| 29.74| 9.51| 14.87| 36.99|11.6|\n| Holiday| Carnival| 28| 46.052| 14.52| 7.27| 7.26| 31.72| 6.6|\n|Imagination| Carnival| 18| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n|Inspiration| Carnival| 17| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Legend| Carnival| 11| 86.0| 21.24| 9.63| 10.62| 40.49| 9.3|\n| Liberty*| Carnival| 8| 110.0| 29.74| 9.51| 14.87| 36.99|11.6|\n| Miracle| Carnival| 9| 88.5| 21.24| 9.63| 10.62| 41.67|10.3|\n| Paradise| Carnival| 15| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Pride| Carnival| 12| 88.5| 21.24| 9.63| 11.62| 41.67| 9.3|\n| Sensation| Carnival| 20| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n+-----------+-----------+---+------------------+----------+------+------+-----------------+----+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-----------+-----------+---+------------------+----------+------+------+-----------------+----+\n| Ship_name|Cruise_line|Age| Tonnage|passengers|length|cabins|passenger_density|crew|\n+-----------+-----------+---+------------------+----------+------+------+-----------------+----+\n| Journey| Azamara| 6|30.276999999999997| 6.94| 5.94| 3.55| 42.64|3.55|\n| Quest| Azamara| 6|30.276999999999997| 6.94| 5.94| 3.55| 42.64|3.55|\n|Celebration| Carnival| 26| 47.262| 14.86| 7.22| 7.43| 31.8| 6.7|\n| Conquest| Carnival| 11| 110.0| 29.74| 9.53| 14.88| 36.99|19.1|\n| Destiny| Carnival| 17| 101.353| 26.42| 8.92| 13.21| 38.36|10.0|\n| Ecstasy| Carnival| 22| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Elation| Carnival| 15| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Fantasy| Carnival| 23| 70.367| 20.56| 8.55| 10.22| 34.23| 9.2|\n|Fascination| Carnival| 19| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Freedom| Carnival| 6|110.23899999999999| 37.0| 9.51| 14.87| 29.79|11.5|\n| Glory| Carnival| 10| 110.0| 29.74| 9.51| 14.87| 36.99|11.6|\n| Holiday| Carnival| 28| 46.052| 14.52| 7.27| 7.26| 31.72| 6.6|\n|Imagination| Carnival| 18| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n|Inspiration| Carnival| 17| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Legend| Carnival| 11| 86.0| 21.24| 9.63| 10.62| 40.49| 9.3|\n| Liberty*| Carnival| 8| 110.0| 29.74| 9.51| 14.87| 36.99|11.6|\n| Miracle| Carnival| 9| 88.5| 21.24| 9.63| 10.62| 41.67|10.3|\n| Paradise| Carnival| 15| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n| Pride| Carnival| 12| 88.5| 21.24| 9.63| 11.62| 41.67| 9.3|\n| Sensation| Carnival| 20| 70.367| 20.52| 8.55| 10.2| 34.29| 9.2|\n+-----------+-----------+---+------------------+----------+------+------+-----------------+----+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.describe().show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"94f1fb20-5480-4afd-a21c-04f836cce69b"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------+---------+-----------+------------------+------------------+-----------------+-----------------+------------------+-----------------+-----------------+\n|summary|Ship_name|Cruise_line| Age| Tonnage| passengers| length| cabins|passenger_density| crew|\n+-------+---------+-----------+------------------+------------------+-----------------+-----------------+------------------+-----------------+-----------------+\n| count| 158| 158| 158| 158| 158| 158| 158| 158| 158|\n| mean| Infinity| null|15.689873417721518| 71.28467088607599|18.45740506329114|8.130632911392404| 8.830000000000005|39.90094936708861|7.794177215189873|\n| stddev| null| null| 7.615691058751413|37.229540025907866|9.677094775143416|1.793473548054825|4.4714172221480615| 8.63921711391542|3.503486564627034|\n| min|Adventure| Azamara| 4| 2.329| 0.66| 2.79| 0.33| 17.7| 0.59|\n| max|Zuiderdam| Windstar| 48| 220.0| 54.0| 11.82| 27.0| 71.43| 21.0|\n+-------+---------+-----------+------------------+------------------+-----------------+-----------------+------------------+-----------------+-----------------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------+---------+-----------+------------------+------------------+-----------------+-----------------+------------------+-----------------+-----------------+\n|summary|Ship_name|Cruise_line| Age| Tonnage| passengers| length| cabins|passenger_density| crew|\n+-------+---------+-----------+------------------+------------------+-----------------+-----------------+------------------+-----------------+-----------------+\n| count| 158| 158| 158| 158| 158| 158| 158| 158| 158|\n| mean| Infinity| null|15.689873417721518| 71.28467088607599|18.45740506329114|8.130632911392404| 8.830000000000005|39.90094936708861|7.794177215189873|\n| stddev| null| null| 7.615691058751413|37.229540025907866|9.677094775143416|1.793473548054825|4.4714172221480615| 8.63921711391542|3.503486564627034|\n| min|Adventure| Azamara| 4| 2.329| 0.66| 2.79| 0.33| 17.7| 0.59|\n| max|Zuiderdam| Windstar| 48| 220.0| 54.0| 11.82| 27.0| 71.43| 21.0|\n+-------+---------+-----------+------------------+------------------+-----------------+-----------------+------------------+-----------------+-----------------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.groupBy('Cruise_line').count().show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"a4deae0b-0447-43c1-a742-3c6f6d12efde"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-----------------+-----+\n| Cruise_line|count|\n+-----------------+-----+\n| Costa| 11|\n| P&O| 6|\n| Cunard| 3|\n|Regent_Seven_Seas| 5|\n| MSC| 8|\n| Carnival| 22|\n| Crystal| 2|\n| Orient| 1|\n| Princess| 17|\n| Silversea| 4|\n| Seabourn| 3|\n| Holland_American| 14|\n| Windstar| 3|\n| Disney| 2|\n| Norwegian| 13|\n| Oceania| 3|\n| Azamara| 2|\n| Celebrity| 10|\n| Star| 6|\n| Royal_Caribbean| 23|\n+-----------------+-----+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-----------------+-----+\n| Cruise_line|count|\n+-----------------+-----+\n| Costa| 11|\n| P&O| 6|\n| Cunard| 3|\n|Regent_Seven_Seas| 5|\n| MSC| 8|\n| Carnival| 22|\n| Crystal| 2|\n| Orient| 1|\n| Princess| 17|\n| Silversea| 4|\n| Seabourn| 3|\n| Holland_American| 14|\n| Windstar| 3|\n| Disney| 2|\n| Norwegian| 13|\n| Oceania| 3|\n| Azamara| 2|\n| Celebrity| 10|\n| Star| 6|\n| Royal_Caribbean| 23|\n+-----------------+-----+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["indexer = StringIndexer(inputCol=\"Cruise_line\", outputCol=\"cruise_cat\")\nindexed = indexer.fit(df).transform(df)\nindexed.head(5)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"105708b6-4dfd-457f-98b6-c0250d72df2c"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[11]: [Row(Ship_name='Journey', Cruise_line='Azamara', Age=6, Tonnage=30.276999999999997, passengers=6.94, length=5.94, cabins=3.55, passenger_density=42.64, crew=3.55, cruise_cat=16.0),\n Row(Ship_name='Quest', Cruise_line='Azamara', Age=6, Tonnage=30.276999999999997, passengers=6.94, length=5.94, cabins=3.55, passenger_density=42.64, crew=3.55, cruise_cat=16.0),\n Row(Ship_name='Celebration', Cruise_line='Carnival', Age=26, Tonnage=47.262, passengers=14.86, length=7.22, cabins=7.43, passenger_density=31.8, crew=6.7, cruise_cat=1.0),\n Row(Ship_name='Conquest', Cruise_line='Carnival', Age=11, Tonnage=110.0, passengers=29.74, length=9.53, cabins=14.88, passenger_density=36.99, crew=19.1, cruise_cat=1.0),\n Row(Ship_name='Destiny', Cruise_line='Carnival', Age=17, Tonnage=101.353, passengers=26.42, length=8.92, cabins=13.21, passenger_density=38.36, crew=10.0, cruise_cat=1.0)]","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[11]: [Row(Ship_name='Journey', Cruise_line='Azamara', Age=6, Tonnage=30.276999999999997, passengers=6.94, length=5.94, cabins=3.55, passenger_density=42.64, crew=3.55, cruise_cat=16.0),\n Row(Ship_name='Quest', Cruise_line='Azamara', Age=6, Tonnage=30.276999999999997, passengers=6.94, length=5.94, cabins=3.55, passenger_density=42.64, crew=3.55, cruise_cat=16.0),\n Row(Ship_name='Celebration', Cruise_line='Carnival', Age=26, Tonnage=47.262, passengers=14.86, length=7.22, cabins=7.43, passenger_density=31.8, crew=6.7, cruise_cat=1.0),\n Row(Ship_name='Conquest', Cruise_line='Carnival', Age=11, Tonnage=110.0, passengers=29.74, length=9.53, cabins=14.88, passenger_density=36.99, crew=19.1, cruise_cat=1.0),\n Row(Ship_name='Destiny', Cruise_line='Carnival', Age=17, Tonnage=101.353, passengers=26.42, length=8.92, cabins=13.21, passenger_density=38.36, crew=10.0, cruise_cat=1.0)]"]}}],"execution_count":0},{"cell_type":"code","source":["indexed.columns"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"532308ad-d51f-4730-903d-6201236ca9e4"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[13]: ['Ship_name',\n 'Cruise_line',\n 'Age',\n 'Tonnage',\n 'passengers',\n 'length',\n 'cabins',\n 'passenger_density',\n 'crew',\n 'cruise_cat']","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[13]: ['Ship_name',\n 'Cruise_line',\n 'Age',\n 'Tonnage',\n 'passengers',\n 'length',\n 'cabins',\n 'passenger_density',\n 'crew',\n 'cruise_cat']"]}}],"execution_count":0},{"cell_type":"code","source":["assembler = VectorAssembler(\n inputCols=['Age',\n 'Tonnage',\n 'passengers',\n 'length',\n 'cabins',\n 'passenger_density',\n 'cruise_cat'],\n outputCol=\"features\")"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"8fed6081-cfce-4246-8c52-9b4baa5a217d"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["output = assembler.transform(indexed)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"57ae7a6a-38b5-45ab-8bad-8031001ba47a"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["output.select(\"features\", \"crew\").show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"ba175a33-ca06-4811-85b8-89b7bd994e79"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+--------------------+----+\n| features|crew|\n+--------------------+----+\n|[6.0,30.276999999...|3.55|\n|[6.0,30.276999999...|3.55|\n|[26.0,47.262,14.8...| 6.7|\n|[11.0,110.0,29.74...|19.1|\n|[17.0,101.353,26....|10.0|\n|[22.0,70.367,20.5...| 9.2|\n|[15.0,70.367,20.5...| 9.2|\n|[23.0,70.367,20.5...| 9.2|\n|[19.0,70.367,20.5...| 9.2|\n|[6.0,110.23899999...|11.5|\n|[10.0,110.0,29.74...|11.6|\n|[28.0,46.052,14.5...| 6.6|\n|[18.0,70.367,20.5...| 9.2|\n|[17.0,70.367,20.5...| 9.2|\n|[11.0,86.0,21.24,...| 9.3|\n|[8.0,110.0,29.74,...|11.6|\n|[9.0,88.5,21.24,9...|10.3|\n|[15.0,70.367,20.5...| 9.2|\n|[12.0,88.5,21.24,...| 9.3|\n|[20.0,70.367,20.5...| 9.2|\n+--------------------+----+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+--------------------+----+\n| features|crew|\n+--------------------+----+\n|[6.0,30.276999999...|3.55|\n|[6.0,30.276999999...|3.55|\n|[26.0,47.262,14.8...| 6.7|\n|[11.0,110.0,29.74...|19.1|\n|[17.0,101.353,26....|10.0|\n|[22.0,70.367,20.5...| 9.2|\n|[15.0,70.367,20.5...| 9.2|\n|[23.0,70.367,20.5...| 9.2|\n|[19.0,70.367,20.5...| 9.2|\n|[6.0,110.23899999...|11.5|\n|[10.0,110.0,29.74...|11.6|\n|[28.0,46.052,14.5...| 6.6|\n|[18.0,70.367,20.5...| 9.2|\n|[17.0,70.367,20.5...| 9.2|\n|[11.0,86.0,21.24,...| 9.3|\n|[8.0,110.0,29.74,...|11.6|\n|[9.0,88.5,21.24,9...|10.3|\n|[15.0,70.367,20.5...| 9.2|\n|[12.0,88.5,21.24,...| 9.3|\n|[20.0,70.367,20.5...| 9.2|\n+--------------------+----+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["final_data = output.select(\"features\", \"crew\")"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"3ade3063-c109-40cb-ad28-e415b1ec1ef3"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["train_data,test_data = final_data.randomSplit([0.7,0.3])"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e5413f02-8c1a-42ba-8452-202372a5a5cc"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["lr = LinearRegression(labelCol='crew')"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"7c1d012f-49b2-4724-be63-218f55958909"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["lrModel = lr.fit(train_data)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"148d915d-cfcc-4a1f-be45-ec0ee465ce1e"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["print(\"Coefficients: {} Intercept: {}\".format(lrModel.coefficients,lrModel.intercept))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"49030922-38b7-476c-beef-bef793cb0ab0"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Coefficients: [-0.00450918681822143,0.012963558609912268,-0.15963811898272345,0.3462005314106989,0.8897395131080795,-0.008624377855820762,0.06069326390922283] Intercept: -0.784750205393044\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Coefficients: [-0.00450918681822143,0.012963558609912268,-0.15963811898272345,0.3462005314106989,0.8897395131080795,-0.008624377855820762,0.06069326390922283] Intercept: -0.784750205393044\n"]}}],"execution_count":0},{"cell_type":"code","source":["test_results = lrModel.evaluate(test_data)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"3127ab02-785d-4782-b81a-66a63d67555d"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["print(\"RMSE: {}\".format(test_results.rootMeanSquaredError))\nprint(\"MSE: {}\".format(test_results.meanSquaredError))\nprint(\"R2: {}\".format(test_results.r2))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"b3b8434b-f2b7-4d78-ad01-0fc2378a5368"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"RMSE: 1.0837245330819674\nMSE: 1.174458863603728\nR2: 0.8798711702553128\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["RMSE: 1.0837245330819674\nMSE: 1.174458863603728\nR2: 0.8798711702553128\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.select(corr('crew','passengers')).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"105826e7-8c53-470c-8ee2-5cda5c94b6f6"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+----------------------+\n|corr(crew, passengers)|\n+----------------------+\n| 0.9152341306065384|\n+----------------------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+----------------------+\n|corr(crew, passengers)|\n+----------------------+\n| 0.9152341306065384|\n+----------------------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["df.select(corr('crew','cabins')).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"299e82a5-bd74-4f1b-86d3-7fb688538ac6"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+------------------+\n|corr(crew, cabins)|\n+------------------+\n|0.9508226063578497|\n+------------------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+------------------+\n|corr(crew, cabins)|\n+------------------+\n|0.9508226063578497|\n+------------------+\n\n"]}}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"ml-lr-regression-case","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":3598965581908843}},"nbformat":4,"nbformat_minor":0}