{"cells":[{"cell_type":"markdown","source":["Spark MLlib library for Machine Learning provides a Collaborative Filtering implementation by using Alternating Least Squares. The implementation in MLlib has these parameters:\n\n* numBlocks is the number of blocks used to parallelize computation (set to -1 to auto-configure).\n* rank is the number of latent factors in the model.\n* iterations is the number of iterations to run.\n* lambda specifies the regularization parameter in ALS.\n* implicitPrefs specifies whether to use the explicit feedback ALS variant or one adapted for implicit feedback data.\n* alpha is a parameter applicable to the implicit feedback variant of ALS that governs the baseline confidence in preference observations."],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"9b3dda7b-53e7-4d44-a3cd-432c8014d64d"}}},{"cell_type":"code","source":["from pyspark.sql import SparkSession"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e40be79e-8e95-454c-8edb-1884d5dda9c0"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["spark = SparkSession.builder.appName('rec').getOrCreate()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"f95adf64-71f5-4715-a98c-113e18cc9735"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["from pyspark.ml.evaluation import RegressionEvaluator\nfrom pyspark.ml.recommendation import ALS"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"b7bbdb9a-510c-406a-aaa0-120df5c8a348"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["data = spark.read.csv(\"dbfs:/FileStore/shared_uploads/dizhen@hsph.harvard.edu/movielens_ratings.csv\",inferSchema=True,header=True)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"217843b4-f1bc-4ffe-98ac-a29ccdb824ee"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["data.head()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"44c8312a-6701-4e78-a5c2-d60fada5239c"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[5]: Row(movieId=2, rating=3.0, userId=0)","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[5]: Row(movieId=2, rating=3.0, userId=0)"]}}],"execution_count":0},{"cell_type":"code","source":["data.describe().show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"06a5696e-337d-4abe-828e-f2f2409a2c90"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------+------------------+------------------+------------------+\n|summary| movieId| rating| userId|\n+-------+------------------+------------------+------------------+\n| count| 1501| 1501| 1501|\n| mean| 49.40572951365756|1.7741505662891406|14.383744170552964|\n| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|\n| min| 0| 1.0| 0|\n| max| 99| 5.0| 29|\n+-------+------------------+------------------+------------------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------+------------------+------------------+------------------+\n|summary| movieId| rating| userId|\n+-------+------------------+------------------+------------------+\n| count| 1501| 1501| 1501|\n| mean| 49.40572951365756|1.7741505662891406|14.383744170552964|\n| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|\n| min| 0| 1.0| 0|\n| max| 99| 5.0| 29|\n+-------+------------------+------------------+------------------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["(training, test) = data.randomSplit([0.8, 0.2])"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"7cf4c894-cd59-4c8d-a5f9-8b93129e0e7f"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["als = ALS(maxIter=5, regParam=0.01, userCol=\"userId\", itemCol=\"movieId\", ratingCol=\"rating\")\nmodel = als.fit(training)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"a6668d49-2339-4151-a62f-e8f99f5bb4f5"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["predictions = model.transform(test)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"1e9cc105-baae-445a-902c-73855faee119"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["predictions.show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"9c39b2f9-e334-452a-82bd-daffce12e6f7"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------+------+------+-----------+\n|movieId|rating|userId| prediction|\n+-------+------+------+-----------+\n| 2| 2.0| 1| 1.9871931|\n| 1| 1.0| 6| 0.28594762|\n| 4| 1.0| 7| 2.146959|\n| 0| 1.0| 8| 1.8356189|\n| 4| 2.0| 8| 0.87620103|\n| 2| 3.0| 9| 2.1391335|\n| 4| 1.0| 9| 2.4214845|\n| 0| 1.0| 11| -1.3260899|\n| 2| 1.0| 12| 3.1861515|\n| 3| 1.0| 13| 2.1452992|\n| 4| 2.0| 13| 0.7788123|\n| 2| 1.0| 15| 2.4739377|\n| 2| 1.0| 17| -2.657514|\n| 3| 1.0| 17| 0.2842377|\n| 2| 2.0| 20| -1.2231252|\n| 3| 2.0| 22| 0.5278863|\n| 0| 1.0| 23| 0.34326315|\n| 4| 1.0| 23| 1.9485532|\n| 0| 1.0| 27|-0.45441574|\n| 0| 3.0| 28| 0.7916339|\n+-------+------+------+-----------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------+------+------+-----------+\n|movieId|rating|userId| prediction|\n+-------+------+------+-----------+\n| 2| 2.0| 1| 1.9871931|\n| 1| 1.0| 6| 0.28594762|\n| 4| 1.0| 7| 2.146959|\n| 0| 1.0| 8| 1.8356189|\n| 4| 2.0| 8| 0.87620103|\n| 2| 3.0| 9| 2.1391335|\n| 4| 1.0| 9| 2.4214845|\n| 0| 1.0| 11| -1.3260899|\n| 2| 1.0| 12| 3.1861515|\n| 3| 1.0| 13| 2.1452992|\n| 4| 2.0| 13| 0.7788123|\n| 2| 1.0| 15| 2.4739377|\n| 2| 1.0| 17| -2.657514|\n| 3| 1.0| 17| 0.2842377|\n| 2| 2.0| 20| -1.2231252|\n| 3| 2.0| 22| 0.5278863|\n| 0| 1.0| 23| 0.34326315|\n| 4| 1.0| 23| 1.9485532|\n| 0| 1.0| 27|-0.45441574|\n| 0| 3.0| 28| 0.7916339|\n+-------+------+------+-----------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["evaluator = RegressionEvaluator(metricName=\"rmse\", labelCol=\"rating\",predictionCol=\"prediction\")\nrmse = evaluator.evaluate(predictions)\nprint(\"Root-mean-square error = \" + str(rmse))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e6ba5489-b310-4a9d-8c81-afa9b36b531a"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Root-mean-square error = 1.783041436897024\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Root-mean-square error = 1.783041436897024\n"]}}],"execution_count":0},{"cell_type":"code","source":["single_user = test.filter(test['userId']==11).select(['movieId','userId'])"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e7bce452-8390-48eb-bfc1-f0c4bba56480"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["single_user.show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"f6ca89b3-726b-430e-a563-349e9bb9e8c2"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------+------+\n|movieId|userId|\n+-------+------+\n| 0| 11|\n| 9| 11|\n| 12| 11|\n| 20| 11|\n| 25| 11|\n| 43| 11|\n| 47| 11|\n| 51| 11|\n| 66| 11|\n| 75| 11|\n| 81| 11|\n| 97| 11|\n| 99| 11|\n+-------+------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------+------+\n|movieId|userId|\n+-------+------+\n| 0| 11|\n| 9| 11|\n| 12| 11|\n| 20| 11|\n| 25| 11|\n| 43| 11|\n| 47| 11|\n| 51| 11|\n| 66| 11|\n| 75| 11|\n| 81| 11|\n| 97| 11|\n| 99| 11|\n+-------+------+\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["reccomendations = model.transform(single_user)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"d7c0d942-993d-41bc-86d7-7c6acc7e419a"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["reccomendations.orderBy('prediction',ascending=False).show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"910fe3d1-5ea7-4758-a0af-3443bd2f0755"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------+------+----------+\n|movieId|userId|prediction|\n+-------+------+----------+\n| 47| 11| 2.4248514|\n| 20| 11| 1.6464235|\n| 43| 11| 1.3852543|\n| 9| 11| 1.372124|\n| 97| 11| 1.0738724|\n| 25| 11| 1.0302699|\n| 99| 11|0.95755816|\n| 51| 11|0.62003374|\n| 81| 11| 0.5865185|\n| 12| 11| 0.44638|\n| 75| 11|0.27664962|\n| 66| 11|0.22641006|\n| 0| 11|-1.3260899|\n+-------+------+----------+\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------+------+----------+\n|movieId|userId|prediction|\n+-------+------+----------+\n| 47| 11| 2.4248514|\n| 20| 11| 1.6464235|\n| 43| 11| 1.3852543|\n| 9| 11| 1.372124|\n| 97| 11| 1.0738724|\n| 25| 11| 1.0302699|\n| 99| 11|0.95755816|\n| 51| 11|0.62003374|\n| 81| 11| 0.5865185|\n| 12| 11| 0.44638|\n| 75| 11|0.27664962|\n| 66| 11|0.22641006|\n| 0| 11|-1.3260899|\n+-------+------+----------+\n\n"]}}],"execution_count":0},{"cell_type":"markdown","source":["Mapping\n\n { 2. : \"Chicken Curry\", \n 3. : \"Spicy Chicken Nuggest\", \n 5. : \"Hamburger\", \n 9. : \"Taco Surprise\", \n 11. : \"Meatloaf\", \n 12. : \"Ceaser Salad\", \n 15. : \"BBQ Ribs\", \n 17. : \"Sushi Plate\", \n 19. : \"Cheesesteak Sandwhich\", \n 21. : \"Lasagna\", \n 23. : \"Orange Chicken\",\n 26. : \"Spicy Beef Plate\", \n 27. : \"Salmon with Mashed Potatoes\", \n 28. : \"Penne Tomatoe Pasta\", \n 29. : \"Pork Sliders\", \n 30. : \"Vietnamese Sandwich\", \n 31. : \"Chicken Wrap\", \n np.nan: \"Cowboy Burger\", \n 4. : \"Pretzels and Cheese Plate\", \n 6. : \"Spicy Pork Sliders\", \n 13. : \"Mandarin Chicken PLate\", \n 14. : \"Kung Pao Chicken\",\n 16. : \"Fried Rice Plate\", \n 8. : \"Chicken Chow Mein\", \n 10. : \"Roasted Eggplant \", \n 18. : \"Pepperoni Pizza\", \n 22. : \"Pulled Pork Plate\", \n 0. : \"Cheese Pizza\", \n 1. : \"Burrito\", \n 7. : \"Nachos\", \n 24. : \"Chili\", \n 20. : \"Southwest Salad\", \n 25.: \"Roast Beef Sandwich\"}"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e2b2050d-ac08-4367-90e1-0be24c296803"}}},{"cell_type":"code","source":["data = spark.read.csv(\"dbfs:/FileStore/shared_uploads/dizhen@hsph.harvard.edu/Meal_Info.csv\",inferSchema=True,header=True)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"2cdb5907-0093-4f7b-9337-8fc776a63662"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["(training, test) = data.randomSplit([0.8, 0.2])"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"1702db9b-3b2b-45a3-b25c-b8c37905086d"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["als = ALS(maxIter=5, regParam=0.01, userCol=\"userId\", itemCol=\"mealskew\", ratingCol=\"rating\")\nmodel = als.fit(training)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"a9cbadd2-4d54-4c12-bfdb-b8c44891305f"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"data":"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m\n\u001B[0;31mPy4JJavaError\u001B[0m Traceback (most recent call last)\n\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0mals\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mALS\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmaxIter\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m5\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mregParam\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m0.01\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0muserCol\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m\"userId\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mitemCol\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m\"mealskew\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mratingCol\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m\"rating\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mmodel\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mals\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mfit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mtraining\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\n\u001B[0;32m/databricks/python_shell/dbruntime/MLWorkloadsInstrumentation/_pyspark.py\u001B[0m in \u001B[0;36mpatched_method\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 28\u001B[0m \u001B[0mcall_succeeded\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mFalse\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 29\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 30\u001B[0;31m \u001B[0mresult\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0moriginal_method\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 31\u001B[0m \u001B[0mcall_succeeded\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 32\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mresult\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/pyspark/ml/base.py\u001B[0m in \u001B[0;36mfit\u001B[0;34m(self, dataset, params)\u001B[0m\n\u001B[1;32m 159\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcopy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 160\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 161\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 162\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m raise TypeError(\"Params must be either a param map or a list/tuple of param maps, \"\n\n\u001B[0;32m/databricks/spark/python/pyspark/ml/wrapper.py\u001B[0m in \u001B[0;36m_fit\u001B[0;34m(self, dataset)\u001B[0m\n\u001B[1;32m 333\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 334\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 335\u001B[0;31m \u001B[0mjava_model\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_fit_java\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 336\u001B[0m \u001B[0mmodel\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_create_model\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mjava_model\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 337\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_copyValues\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmodel\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/pyspark/ml/wrapper.py\u001B[0m in \u001B[0;36m_fit_java\u001B[0;34m(self, dataset)\u001B[0m\n\u001B[1;32m 330\u001B[0m \"\"\"\n\u001B[1;32m 331\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_transfer_params_to_java\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 332\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_java_obj\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mfit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_jdf\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 333\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 334\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self, *args)\u001B[0m\n\u001B[1;32m 1302\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1303\u001B[0m \u001B[0manswer\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgateway_client\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msend_command\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcommand\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m-> 1304\u001B[0;31m return_value = get_return_value(\n\u001B[0m\u001B[1;32m 1305\u001B[0m answer, self.gateway_client, self.target_id, self.name)\n\u001B[1;32m 1306\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/pyspark/sql/utils.py\u001B[0m in \u001B[0;36mdeco\u001B[0;34m(*a, **kw)\u001B[0m\n\u001B[1;32m 115\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mdeco\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkw\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 116\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 117\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkw\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 118\u001B[0m \u001B[0;32mexcept\u001B[0m \u001B[0mpy4j\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mprotocol\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mPy4JJavaError\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0me\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 119\u001B[0m \u001B[0mconverted\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mconvert_exception\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0me\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mjava_exception\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/protocol.py\u001B[0m in \u001B[0;36mget_return_value\u001B[0;34m(answer, gateway_client, target_id, name)\u001B[0m\n\u001B[1;32m 324\u001B[0m \u001B[0mvalue\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mOUTPUT_CONVERTER\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mtype\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0manswer\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m2\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mgateway_client\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 325\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0manswer\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m1\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m==\u001B[0m \u001B[0mREFERENCE_TYPE\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 326\u001B[0;31m raise Py4JJavaError(\n\u001B[0m\u001B[1;32m 327\u001B[0m \u001B[0;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 328\u001B[0m format(target_id, \".\", name), value)\n\n\u001B[0;31mPy4JJavaError\u001B[0m: An error occurred while calling o894.fit.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 218.0 failed 1 times, most recent failure: Lost task 0.0 in stage 218.0 (TID 251) (ip-10-172-165-175.us-west-2.compute.internal executor driver): org.apache.spark.SparkException: Failed to execute user defined function (ALSModelParams$$Lambda$6638/1722650802: (double) => int)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala:166)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)\n\tat scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:224)\n\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)\n\tat org.apache.spark.scheduler.Task.doRunTask(Task.scala:156)\n\tat org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:125)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:95)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:826)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1670)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:829)\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:684)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: java.lang.IllegalArgumentException: ALS only supports values in Integer range for columns userId and mealskew. Value null was not numeric.\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1(ALS.scala:105)\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1$adapted(ALS.scala:90)\n\t... 29 more\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2984)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2931)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2925)\n\tat scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)\n\tat scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2925)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1345)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1345)\n\tat scala.Option.foreach(Option.scala:407)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1345)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3193)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3134)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3122)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1107)\n\tat org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2628)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2611)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2649)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2668)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2693)\n\tat org.apache.spark.rdd.RDD.count(RDD.scala:1263)\n\tat org.apache.spark.ml.recommendation.ALS$.train(ALS.scala:974)\n\tat org.apache.spark.ml.recommendation.ALS.$anonfun$fit$1(ALS.scala:723)\n\tat org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:284)\n\tat scala.util.Try$.apply(Try.scala:213)\n\tat org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:284)\n\tat org.apache.spark.ml.recommendation.ALS.fit(ALS.scala:705)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)\n\tat py4j.Gateway.invoke(Gateway.java:295)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:251)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: org.apache.spark.SparkException: Failed to execute user defined function (ALSModelParams$$Lambda$6638/1722650802: (double) => int)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala:166)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)\n\tat scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:224)\n\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)\n\tat org.apache.spark.scheduler.Task.doRunTask(Task.scala:156)\n\tat org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:125)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:95)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:826)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1670)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:829)\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:684)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\t... 1 more\nCaused by: java.lang.IllegalArgumentException: ALS only supports values in Integer range for columns userId and mealskew. Value null was not numeric.\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1(ALS.scala:105)\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1$adapted(ALS.scala:90)\n\t... 29 more\n","errorSummary":"org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 218.0 failed 1 times, most recent failure: Lost task 0.0 in stage 218.0 (TID 251) (ip-10-172-165-175.us-west-2.compute.internal executor driver): org.apache.spark.SparkException: Failed to execute user defined function (ALSModelParams$$Lambda$6638/1722650802: (double) => int)","metadata":{},"errorTraceType":"ansi","type":"ipynbError","arguments":{}}},"output_type":"display_data","data":{"text/plain":["\u001B[0;31m---------------------------------------------------------------------------\u001B[0m\n\u001B[0;31mPy4JJavaError\u001B[0m Traceback (most recent call last)\n\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0mals\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mALS\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmaxIter\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m5\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mregParam\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m0.01\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0muserCol\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m\"userId\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mitemCol\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m\"mealskew\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mratingCol\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m\"rating\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mmodel\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mals\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mfit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mtraining\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\n\u001B[0;32m/databricks/python_shell/dbruntime/MLWorkloadsInstrumentation/_pyspark.py\u001B[0m in \u001B[0;36mpatched_method\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 28\u001B[0m \u001B[0mcall_succeeded\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mFalse\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 29\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 30\u001B[0;31m \u001B[0mresult\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0moriginal_method\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 31\u001B[0m \u001B[0mcall_succeeded\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 32\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mresult\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/pyspark/ml/base.py\u001B[0m in \u001B[0;36mfit\u001B[0;34m(self, dataset, params)\u001B[0m\n\u001B[1;32m 159\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcopy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 160\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 161\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 162\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m raise TypeError(\"Params must be either a param map or a list/tuple of param maps, \"\n\n\u001B[0;32m/databricks/spark/python/pyspark/ml/wrapper.py\u001B[0m in \u001B[0;36m_fit\u001B[0;34m(self, dataset)\u001B[0m\n\u001B[1;32m 333\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 334\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 335\u001B[0;31m \u001B[0mjava_model\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_fit_java\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 336\u001B[0m \u001B[0mmodel\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_create_model\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mjava_model\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 337\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_copyValues\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmodel\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/pyspark/ml/wrapper.py\u001B[0m in \u001B[0;36m_fit_java\u001B[0;34m(self, dataset)\u001B[0m\n\u001B[1;32m 330\u001B[0m \"\"\"\n\u001B[1;32m 331\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_transfer_params_to_java\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 332\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_java_obj\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mfit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_jdf\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 333\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 334\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m_fit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdataset\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self, *args)\u001B[0m\n\u001B[1;32m 1302\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1303\u001B[0m \u001B[0manswer\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgateway_client\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msend_command\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcommand\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m-> 1304\u001B[0;31m return_value = get_return_value(\n\u001B[0m\u001B[1;32m 1305\u001B[0m answer, self.gateway_client, self.target_id, self.name)\n\u001B[1;32m 1306\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/pyspark/sql/utils.py\u001B[0m in \u001B[0;36mdeco\u001B[0;34m(*a, **kw)\u001B[0m\n\u001B[1;32m 115\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mdeco\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkw\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 116\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 117\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkw\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 118\u001B[0m \u001B[0;32mexcept\u001B[0m \u001B[0mpy4j\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mprotocol\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mPy4JJavaError\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0me\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 119\u001B[0m \u001B[0mconverted\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mconvert_exception\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0me\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mjava_exception\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\n\u001B[0;32m/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/protocol.py\u001B[0m in \u001B[0;36mget_return_value\u001B[0;34m(answer, gateway_client, target_id, name)\u001B[0m\n\u001B[1;32m 324\u001B[0m \u001B[0mvalue\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mOUTPUT_CONVERTER\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mtype\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0manswer\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m2\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mgateway_client\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 325\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0manswer\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m1\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m==\u001B[0m \u001B[0mREFERENCE_TYPE\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 326\u001B[0;31m raise Py4JJavaError(\n\u001B[0m\u001B[1;32m 327\u001B[0m \u001B[0;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 328\u001B[0m format(target_id, \".\", name), value)\n\n\u001B[0;31mPy4JJavaError\u001B[0m: An error occurred while calling o894.fit.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 218.0 failed 1 times, most recent failure: Lost task 0.0 in stage 218.0 (TID 251) (ip-10-172-165-175.us-west-2.compute.internal executor driver): org.apache.spark.SparkException: Failed to execute user defined function (ALSModelParams$$Lambda$6638/1722650802: (double) => int)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala:166)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)\n\tat scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:224)\n\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)\n\tat org.apache.spark.scheduler.Task.doRunTask(Task.scala:156)\n\tat org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:125)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:95)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:826)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1670)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:829)\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:684)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: java.lang.IllegalArgumentException: ALS only supports values in Integer range for columns userId and mealskew. Value null was not numeric.\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1(ALS.scala:105)\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1$adapted(ALS.scala:90)\n\t... 29 more\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2984)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2931)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2925)\n\tat scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)\n\tat scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2925)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1345)\n\tat org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1345)\n\tat scala.Option.foreach(Option.scala:407)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1345)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3193)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3134)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3122)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1107)\n\tat org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2628)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2611)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2649)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2668)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2693)\n\tat org.apache.spark.rdd.RDD.count(RDD.scala:1263)\n\tat org.apache.spark.ml.recommendation.ALS$.train(ALS.scala:974)\n\tat org.apache.spark.ml.recommendation.ALS.$anonfun$fit$1(ALS.scala:723)\n\tat org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:284)\n\tat scala.util.Try$.apply(Try.scala:213)\n\tat org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:284)\n\tat org.apache.spark.ml.recommendation.ALS.fit(ALS.scala:705)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)\n\tat py4j.Gateway.invoke(Gateway.java:295)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:251)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: org.apache.spark.SparkException: Failed to execute user defined function (ALSModelParams$$Lambda$6638/1722650802: (double) => int)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors$.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala:166)\n\tat org.apache.spark.sql.errors.QueryExecutionErrors.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)\n\tat scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:224)\n\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:81)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)\n\tat org.apache.spark.scheduler.Task.doRunTask(Task.scala:156)\n\tat org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:125)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:95)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:826)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1670)\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:829)\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n\tat com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:684)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\t... 1 more\nCaused by: java.lang.IllegalArgumentException: ALS only supports values in Integer range for columns userId and mealskew. Value null was not numeric.\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1(ALS.scala:105)\n\tat org.apache.spark.ml.recommendation.ALSModelParams.$anonfun$checkedCast$1$adapted(ALS.scala:90)\n\t... 29 more\n"]}}],"execution_count":0},{"cell_type":"code","source":["predictions = model.transform(test)\npredictions.show()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"1e615de1-79f4-47ec-8631-8123a3eb1b7e"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"+-------+------+------+--------+--------------------+----------+\n|movieId|rating|userId|mealskew| meal_name|prediction|\n+-------+------+------+--------+--------------------+----------+\n| 2| 2.0| 1| 2.0| Chicken Curry| 1.9871931|\n| 3| 1.0| 1| 3.0|Spicy Chicken Nug...|0.90050435|\n| 4| 3.0| 2| 4.0|Pretzels and Chee...| 2.912253|\n| 0| 1.0| 5| 0.0| Cheese Pizza| 1.1937842|\n| 3| 1.0| 7| 3.0|Spicy Chicken Nug...| 1.367688|\n| 4| 1.0| 7| 4.0|Pretzels and Chee...| 2.146959|\n| 3| 2.0| 8| 3.0|Spicy Chicken Nug...| 1.7796037|\n| 2| 3.0| 9| 2.0| Chicken Curry| 2.1391335|\n| 3| 1.0| 9| 3.0|Spicy Chicken Nug...| 1.0622486|\n| 2| 1.0| 12| 2.0| Chicken Curry| 3.1861515|\n| 4| 1.0| 12| 4.0|Pretzels and Chee...|0.73995054|\n| 3| 1.0| 13| 3.0|Spicy Chicken Nug...| 2.1452992|\n| 1| 4.0| 15| 1.0| Burrito| 2.993206|\n| 0| 1.0| 19| 0.0| Cheese Pizza| 1.0666925|\n| 1| 1.0| 19| 1.0| Burrito| 0.7900078|\n| 0| 1.0| 20| 0.0| Cheese Pizza| 1.0853959|\n| 1| 1.0| 20| 1.0| Burrito| 1.0895385|\n| 0| 1.0| 23| 0.0| Cheese Pizza|0.34326315|\n| 2| 1.0| 23| 2.0| Chicken Curry| 0.9516133|\n| 1| 1.0| 26| 1.0| Burrito| 0.8212584|\n+-------+------+------+--------+--------------------+----------+\nonly showing top 20 rows\n\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["+-------+------+------+--------+--------------------+----------+\n|movieId|rating|userId|mealskew| meal_name|prediction|\n+-------+------+------+--------+--------------------+----------+\n| 2| 2.0| 1| 2.0| Chicken Curry| 1.9871931|\n| 3| 1.0| 1| 3.0|Spicy Chicken Nug...|0.90050435|\n| 4| 3.0| 2| 4.0|Pretzels and Chee...| 2.912253|\n| 0| 1.0| 5| 0.0| Cheese Pizza| 1.1937842|\n| 3| 1.0| 7| 3.0|Spicy Chicken Nug...| 1.367688|\n| 4| 1.0| 7| 4.0|Pretzels and Chee...| 2.146959|\n| 3| 2.0| 8| 3.0|Spicy Chicken Nug...| 1.7796037|\n| 2| 3.0| 9| 2.0| Chicken Curry| 2.1391335|\n| 3| 1.0| 9| 3.0|Spicy Chicken Nug...| 1.0622486|\n| 2| 1.0| 12| 2.0| Chicken Curry| 3.1861515|\n| 4| 1.0| 12| 4.0|Pretzels and Chee...|0.73995054|\n| 3| 1.0| 13| 3.0|Spicy Chicken Nug...| 2.1452992|\n| 1| 4.0| 15| 1.0| Burrito| 2.993206|\n| 0| 1.0| 19| 0.0| Cheese Pizza| 1.0666925|\n| 1| 1.0| 19| 1.0| Burrito| 0.7900078|\n| 0| 1.0| 20| 0.0| Cheese Pizza| 1.0853959|\n| 1| 1.0| 20| 1.0| Burrito| 1.0895385|\n| 0| 1.0| 23| 0.0| Cheese Pizza|0.34326315|\n| 2| 1.0| 23| 2.0| Chicken Curry| 0.9516133|\n| 1| 1.0| 26| 1.0| Burrito| 0.8212584|\n+-------+------+------+--------+--------------------+----------+\nonly showing top 20 rows\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["evaluator = RegressionEvaluator(metricName=\"rmse\", labelCol=\"rating\",predictionCol=\"prediction\")\nrmse = evaluator.evaluate(predictions)\nprint(\"Root-mean-square error = \" + str(rmse))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"0b3355cc-b6a9-48ad-9f69-9d32368e52aa"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Root-mean-square error = 0.8463110939439068\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Root-mean-square error = 0.8463110939439068\n"]}}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"ml-recommender","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":1680652028931009}},"nbformat":4,"nbformat_minor":0}