{"cells":[{"cell_type":"markdown","source":["# Install ELI5"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"de587f35-6fba-430e-a116-3e9c69fc0729","inputWidgets":{},"title":""}}},{"cell_type":"code","source":["pip install eli5"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"b9d06c97-fa9e-4bb2-802d-20d42cee66be","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Python interpreter will be restarted.\nRequirement already satisfied: eli5 in /local_disk0/.ephemeral_nfs/envs/pythonEnv-6838ed59-5f7b-4739-91aa-aab22bb33605/lib/python3.9/site-packages (0.13.0)\nRequirement already satisfied: scipy in /databricks/python3/lib/python3.9/site-packages (from eli5) (1.7.1)\nRequirement already satisfied: tabulate>=0.7.7 in /databricks/python3/lib/python3.9/site-packages (from eli5) (0.8.9)\nRequirement already satisfied: numpy>=1.9.0 in /databricks/python3/lib/python3.9/site-packages (from eli5) (1.20.3)\nRequirement already satisfied: jinja2>=3.0.0 in /local_disk0/.ephemeral_nfs/envs/pythonEnv-6838ed59-5f7b-4739-91aa-aab22bb33605/lib/python3.9/site-packages (from eli5) (3.1.2)\nRequirement already satisfied: attrs>17.1.0 in /databricks/python3/lib/python3.9/site-packages (from eli5) (21.2.0)\nRequirement already satisfied: graphviz in /local_disk0/.ephemeral_nfs/envs/pythonEnv-6838ed59-5f7b-4739-91aa-aab22bb33605/lib/python3.9/site-packages (from eli5) (0.20.1)\nRequirement already satisfied: six in /databricks/python3/lib/python3.9/site-packages (from eli5) (1.16.0)\nRequirement already satisfied: scikit-learn>=0.20 in /databricks/python3/lib/python3.9/site-packages (from eli5) (0.24.2)\nRequirement already satisfied: MarkupSafe>=2.0 in /databricks/python3/lib/python3.9/site-packages (from jinja2>=3.0.0->eli5) (2.0.1)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /databricks/python3/lib/python3.9/site-packages (from scikit-learn>=0.20->eli5) (2.2.0)\nRequirement already satisfied: joblib>=0.11 in /databricks/python3/lib/python3.9/site-packages (from scikit-learn>=0.20->eli5) (1.0.1)\nPython interpreter will be restarted.\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Python interpreter will be restarted.\nRequirement already satisfied: eli5 in /local_disk0/.ephemeral_nfs/envs/pythonEnv-6838ed59-5f7b-4739-91aa-aab22bb33605/lib/python3.9/site-packages (0.13.0)\nRequirement already satisfied: scipy in /databricks/python3/lib/python3.9/site-packages (from eli5) (1.7.1)\nRequirement already satisfied: tabulate>=0.7.7 in /databricks/python3/lib/python3.9/site-packages (from eli5) (0.8.9)\nRequirement already satisfied: numpy>=1.9.0 in /databricks/python3/lib/python3.9/site-packages (from eli5) (1.20.3)\nRequirement already satisfied: jinja2>=3.0.0 in /local_disk0/.ephemeral_nfs/envs/pythonEnv-6838ed59-5f7b-4739-91aa-aab22bb33605/lib/python3.9/site-packages (from eli5) (3.1.2)\nRequirement already satisfied: attrs>17.1.0 in /databricks/python3/lib/python3.9/site-packages (from eli5) (21.2.0)\nRequirement already satisfied: graphviz in /local_disk0/.ephemeral_nfs/envs/pythonEnv-6838ed59-5f7b-4739-91aa-aab22bb33605/lib/python3.9/site-packages (from eli5) (0.20.1)\nRequirement already satisfied: six in /databricks/python3/lib/python3.9/site-packages (from eli5) (1.16.0)\nRequirement already satisfied: scikit-learn>=0.20 in /databricks/python3/lib/python3.9/site-packages (from eli5) (0.24.2)\nRequirement already satisfied: MarkupSafe>=2.0 in /databricks/python3/lib/python3.9/site-packages (from jinja2>=3.0.0->eli5) (2.0.1)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /databricks/python3/lib/python3.9/site-packages (from scikit-learn>=0.20->eli5) (2.2.0)\nRequirement already satisfied: joblib>=0.11 in /databricks/python3/lib/python3.9/site-packages (from scikit-learn>=0.20->eli5) (1.0.1)\nPython interpreter will be restarted.\n"]}}],"execution_count":0},{"cell_type":"markdown","source":["# Imports"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"16452844-3fb5-426a-bfc7-21a9b8df5c34","inputWidgets":{},"title":""}}},{"cell_type":"code","source":["import sklearn\nimport pandas as pd\nimport eli5\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn import metrics\nfrom eli5 import show_weights, show_prediction ,explain_weights\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.feature_extraction import DictVectorizer\nimport xgboost as xgb\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.metrics import r2_score"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"7d72707c-ab55-4efc-bbc2-21f87d88706b","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":{"text/plain":"","application/vnd.databricks.v1+bamboolib_hint":"{\"pd.DataFrames\": [], \"version\": \"0.0.1\"}"},"removedWidgets":[],"addedWidgets":{},"metadata":{"kernelSessionId":"7d0b6efc-03385590aeda97208e11371a"},"type":"mimeBundle","arguments":{}}},"output_type":"display_data","data":{"text/plain":"","application/vnd.databricks.v1+bamboolib_hint":"{\"pd.DataFrames\": [], \"version\": \"0.0.1\"}"}}],"execution_count":0},{"cell_type":"code","source":["iliwycmbd_features = spark.table('charging_stations.iliwycmbd_features')\nmdpf=spark.table('charging_stations.mdpf_features').drop('lat','long')\nbitamss= spark.table('charging_stations.bitamss').drop('lat','long')\ntargets=spark.table('charging_stations.targets')\n\nmodelTable= mdpf.join(iliwycmbd_features, ['tractcode','state','evcount','county','isev'], 'left')\nmodelTable1= modelTable.join(bitamss, ['tractcode','state','evcount','county','isev'], 'left').drop('tractcode', 'state', 'county', 'closest_ev_tract', 'transport_count', 'population', 'placekey', 'daily_top_brand', 'monthly_top_brand', 'county_name').toPandas()\ntargets=spark.table('charging_stations.targets')\ntargets= targets.drop('evcount','tractcode','state','county').toPandas()\n"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"30a8b508-cf13-4fd6-9419-4a4656d16357","inputWidgets":{},"title":""}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["y=targets\nX= modelTable1\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state = 14)\n\n\nmodel = xgb.XGBRegressor(random_state=14)\nmodel.fit(X_train, y_train)\n\ny_pred = model.predict(X_test)\nr2_score_original = r2_score(y_test, y_pred)\nr2_score_original"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"6e7cdf95-0283-4db5-b2de-a0f6f9d3926b","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[3]: 0.05377815038110201","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[3]: 0.05377815038110201"]}}],"execution_count":0},{"cell_type":"code","source":["show_weights(model)"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"6b0cc388-10e6-4b93-ae8a-fe8c3d7cc6d0","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.0809\n \n \n distance_km\n
\n 0.0567\n \n \n median_gas_dwell_time\n
\n 0.0428\n \n \n avg_gas_station_visits_on_weekday\n
\n 0.0410\n \n \n daily_top_brand_visits\n
\n 0.0266\n \n \n monthly_top_brand_visits\n
\n 0.0220\n \n \n land_m\n
\n 0.0184\n \n \n car_truck_or_van_carpool\n
\n 0.0182\n \n \n total_visit_counts\n
\n 0.0178\n \n \n car_truck_or_van_7_more_person\n
\n 0.0177\n \n \n car_truck_or_van_alone\n
\n 0.0175\n \n \n count_fam_homes\n
\n 0.0171\n \n \n Number_of_Schools\n
\n 0.0170\n \n \n walked\n
\n 0.0170\n \n \n some_diversity\n
\n 0.0170\n \n \n avg_gas_station_visits_on_weekend\n
\n 0.0169\n \n \n car_truck_or_van_3_person\n
\n 0.0166\n \n \n public_trans_bus\n
\n 0.0164\n \n \n population_per_gas_station\n
\n 0.0162\n \n \n population_per_libraries\n
\n 0.0160\n \n \n count_non_fam_homes\n
\n … 38 more …\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n","textData":null,"removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.0809\n \n \n distance_km\n
\n 0.0567\n \n \n median_gas_dwell_time\n
\n 0.0428\n \n \n avg_gas_station_visits_on_weekday\n
\n 0.0410\n \n \n daily_top_brand_visits\n
\n 0.0266\n \n \n monthly_top_brand_visits\n
\n 0.0220\n \n \n land_m\n
\n 0.0184\n \n \n car_truck_or_van_carpool\n
\n 0.0182\n \n \n total_visit_counts\n
\n 0.0178\n \n \n car_truck_or_van_7_more_person\n
\n 0.0177\n \n \n car_truck_or_van_alone\n
\n 0.0175\n \n \n count_fam_homes\n
\n 0.0171\n \n \n Number_of_Schools\n
\n 0.0170\n \n \n walked\n
\n 0.0170\n \n \n some_diversity\n
\n 0.0170\n \n \n avg_gas_station_visits_on_weekend\n
\n 0.0169\n \n \n car_truck_or_van_3_person\n
\n 0.0166\n \n \n public_trans_bus\n
\n 0.0164\n \n \n population_per_gas_station\n
\n 0.0162\n \n \n population_per_libraries\n
\n 0.0160\n \n \n count_non_fam_homes\n
\n … 38 more …\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["show_prediction(model, X.iloc[700], show_feature_values=True, top=(5,5))\n# This shows the top 5 and bottom 5 features. "],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"0d41fc4a-e188-4f02-bbbb-1eaef57a8037","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n \n\n \n\n \n \n \n \n \n

\n \n \n y\n \n\n\n \n (score -0.300)\n\ntop features\n

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n\n \n
\n Contribution?\n FeatureValue
\n +0.057\n \n distance_km\n \n 2.827\n
\n +0.008\n \n count_fam_homes\n \n 3622.000\n
\n +0.006\n \n total_visit_counts\n \n 692949.000\n
\n +0.003\n \n percentage\n \n 0.000\n
\n +0.002\n \n subway_avg_daily_traffic\n \n Missing\n
\n … 20 more positive …\n
\n … 25 more negative …\n
\n -0.014\n \n monthly_top_brand_visits\n \n 147.000\n
\n -0.015\n \n median_gas_dwell_time\n \n 7.110\n
\n -0.016\n \n square_miles\n \n 12.530\n
\n -0.016\n \n avg_gas_station_visits_on_weekday\n \n 114.000\n
\n -0.265\n \n <BIAS>\n \n 1.000\n
\n\n \n \n\n \n\n\n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n","textData":null,"removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n \n\n \n\n \n \n \n \n \n

\n \n \n y\n \n\n\n \n (score -0.300)\n\ntop features\n

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n\n \n
\n Contribution?\n FeatureValue
\n +0.057\n \n distance_km\n \n 2.827\n
\n +0.008\n \n count_fam_homes\n \n 3622.000\n
\n +0.006\n \n total_visit_counts\n \n 692949.000\n
\n +0.003\n \n percentage\n \n 0.000\n
\n +0.002\n \n subway_avg_daily_traffic\n \n Missing\n
\n … 20 more positive …\n
\n … 25 more negative …\n
\n -0.014\n \n monthly_top_brand_visits\n \n 147.000\n
\n -0.015\n \n median_gas_dwell_time\n \n 7.110\n
\n -0.016\n \n square_miles\n \n 12.530\n
\n -0.016\n \n avg_gas_station_visits_on_weekday\n \n 114.000\n
\n -0.265\n \n <BIAS>\n \n 1.000\n
\n\n \n \n\n \n\n\n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["show_prediction(model, X.iloc[700], show_feature_values=True,feature_re=r\"^t\" ) \n# this shows all feature predictions but its filtered by everything starting with \"t\""],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"d9a756d7-809e-485a-9fbe-0b7d3a43b18d","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n \n\n \n\n \n \n \n \n \n

\n \n \n y\n \n\n\n \n (score -0.300)\n\ntop features\n

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n\n \n
\n Contribution?\n FeatureValue
\n +0.006\n \n total_visit_counts\n \n 692949.000\n
\n +0.000\n \n trans_NA\n \n 2650.000\n
\n +0.000\n \n transport_station_counts\n \n 0.000\n
\n -0.000\n \n total_tract_population\n \n 5171.000\n
\n -0.000\n \n total_starbucks_by_tract\n \n Missing\n
\n -0.001\n \n taxi\n \n 0.000\n
\n\n \n \n\n \n\n\n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n","textData":null,"removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n \n\n \n\n \n \n \n \n \n

\n \n \n y\n \n\n\n \n (score -0.300)\n\ntop features\n

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n\n \n
\n Contribution?\n FeatureValue
\n +0.006\n \n total_visit_counts\n \n 692949.000\n
\n +0.000\n \n trans_NA\n \n 2650.000\n
\n +0.000\n \n transport_station_counts\n \n 0.000\n
\n -0.000\n \n total_tract_population\n \n 5171.000\n
\n -0.000\n \n total_starbucks_by_tract\n \n Missing\n
\n -0.001\n \n taxi\n \n 0.000\n
\n\n \n \n\n \n\n\n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["show_weights(model, feature_filter=lambda fet : fet.endswith(\"n\"))\n# This does the exact oppisite of the above function. gets everything that ends with \"n\""],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"f00999d6-cec8-4c2a-a6e0-46853c44465a","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.0178\n \n \n car_truck_or_van_7_more_person\n
\n 0.0169\n \n \n car_truck_or_van_3_person\n
\n 0.0164\n \n \n population_per_gas_station\n
\n 0.0155\n \n \n car_truck_or_van\n
\n 0.0153\n \n \n car_truck_or_van_carpool_2_person\n
\n 0.0147\n \n \n car_truck_or_van_4_person\n
\n 0.0141\n \n \n car_truck_or_van_5_6_person\n
\n 0.0140\n \n \n total_tract_population\n
\n 0.0131\n \n \n public_trans_train\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n","textData":null,"removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.0178\n \n \n car_truck_or_van_7_more_person\n
\n 0.0169\n \n \n car_truck_or_van_3_person\n
\n 0.0164\n \n \n population_per_gas_station\n
\n 0.0155\n \n \n car_truck_or_van\n
\n 0.0153\n \n \n car_truck_or_van_carpool_2_person\n
\n 0.0147\n \n \n car_truck_or_van_4_person\n
\n 0.0141\n \n \n car_truck_or_van_5_6_person\n
\n 0.0140\n \n \n total_tract_population\n
\n 0.0131\n \n \n public_trans_train\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n"]}}],"execution_count":0},{"cell_type":"markdown","source":["# Housing example"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"b3f57178-f745-4678-8fb9-451a5fcde636","inputWidgets":{},"title":""}}},{"cell_type":"code","source":["housing = pd.read_csv('https://raw.githubusercontent.com/byui-cse/cse450-course/master/data/housing.csv')\n#housing.info()\n#Before we do anything to the data lets see how good the model does just off the basic data\ny_before = housing.price\nX = housing.drop(columns=['id','date','price'])\nX_train, X_test, y_train, y_test = train_test_split(X, y_before, test_size=.2, random_state = 14)\n\n\nmodel = xgb.XGBRegressor(random_state=14)\nmodel.fit(X_train, y_train)\n\ny_pred = model.predict(X_test)\nr2_score_original = r2_score(y_test, y_pred)# best score so far 0.9981473851472684\nr2_score_original\n"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"2c301ea5-3c48-4dc4-bedd-ac959e474153","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"Out[8]: 0.8927099331051527","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["Out[8]: 0.8927099331051527"]}}],"execution_count":0},{"cell_type":"code","source":["\nshow_weights(model)"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"5f5167c5-fa3d-4611-8862-c4cc4e41174a","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.4605\n \n \n grade\n
\n 0.2345\n \n \n waterfront\n
\n 0.1117\n \n \n sqft_living\n
\n 0.0533\n \n \n lat\n
\n 0.0299\n \n \n view\n
\n 0.0262\n \n \n long\n
\n 0.0161\n \n \n yr_built\n
\n 0.0142\n \n \n zipcode\n
\n 0.0128\n \n \n sqft_living15\n
\n 0.0082\n \n \n sqft_above\n
\n 0.0067\n \n \n sqft_lot\n
\n 0.0060\n \n \n condition\n
\n 0.0041\n \n \n yr_renovated\n
\n 0.0039\n \n \n floors\n
\n 0.0037\n \n \n bathrooms\n
\n 0.0035\n \n \n sqft_lot15\n
\n 0.0033\n \n \n sqft_basement\n
\n 0.0014\n \n \n bedrooms\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n","textData":null,"removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.4605\n \n \n grade\n
\n 0.2345\n \n \n waterfront\n
\n 0.1117\n \n \n sqft_living\n
\n 0.0533\n \n \n lat\n
\n 0.0299\n \n \n view\n
\n 0.0262\n \n \n long\n
\n 0.0161\n \n \n yr_built\n
\n 0.0142\n \n \n zipcode\n
\n 0.0128\n \n \n sqft_living15\n
\n 0.0082\n \n \n sqft_above\n
\n 0.0067\n \n \n sqft_lot\n
\n 0.0060\n \n \n condition\n
\n 0.0041\n \n \n yr_renovated\n
\n 0.0039\n \n \n floors\n
\n 0.0037\n \n \n bathrooms\n
\n 0.0035\n \n \n sqft_lot15\n
\n 0.0033\n \n \n sqft_basement\n
\n 0.0014\n \n \n bedrooms\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["show_prediction(model, X.iloc[708], show_feature_values=True)"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"a0761e89-f7cf-4c89-b94d-7931fc619a89","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n \n\n \n\n \n \n \n \n \n

\n \n \n y\n \n\n\n \n (score 458161.229)\n\ntop features\n

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n\n \n
\n Contribution?\n FeatureValue
\n +540546.599\n \n <BIAS>\n \n 1.000\n
\n +38474.841\n \n lat\n \n 47.713\n
\n +14588.918\n \n yr_built\n \n 1941.000\n
\n +6750.933\n \n condition\n \n 4.000\n
\n +4767.062\n \n sqft_lot\n \n 9360.000\n
\n +3428.585\n \n long\n \n -122.283\n
\n +2019.823\n \n bedrooms\n \n 3.000\n
\n +1976.741\n \n floors\n \n 1.000\n
\n +783.795\n \n sqft_lot15\n \n 7200.000\n
\n +175.456\n \n bathrooms\n \n 1.750\n
\n -231.319\n \n waterfront\n \n 0.000\n
\n -2241.041\n \n yr_renovated\n \n 0.000\n
\n -5944.459\n \n sqft_above\n \n 940.000\n
\n -7103.751\n \n view\n \n 0.000\n
\n -7479.898\n \n zipcode\n \n 98125.000\n
\n -11595.488\n \n sqft_living\n \n 1880.000\n
\n -13477.596\n \n sqft_living15\n \n 1390.000\n
\n -19283.361\n \n sqft_basement\n \n 940.000\n
\n -87994.612\n \n grade\n \n 7.000\n
\n\n \n \n\n \n\n\n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n","textData":null,"removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n \n\n \n\n \n \n \n \n \n

\n \n \n y\n \n\n\n \n (score 458161.229)\n\ntop features\n

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n \n\n \n
\n Contribution?\n FeatureValue
\n +540546.599\n \n <BIAS>\n \n 1.000\n
\n +38474.841\n \n lat\n \n 47.713\n
\n +14588.918\n \n yr_built\n \n 1941.000\n
\n +6750.933\n \n condition\n \n 4.000\n
\n +4767.062\n \n sqft_lot\n \n 9360.000\n
\n +3428.585\n \n long\n \n -122.283\n
\n +2019.823\n \n bedrooms\n \n 3.000\n
\n +1976.741\n \n floors\n \n 1.000\n
\n +783.795\n \n sqft_lot15\n \n 7200.000\n
\n +175.456\n \n bathrooms\n \n 1.750\n
\n -231.319\n \n waterfront\n \n 0.000\n
\n -2241.041\n \n yr_renovated\n \n 0.000\n
\n -5944.459\n \n sqft_above\n \n 940.000\n
\n -7103.751\n \n view\n \n 0.000\n
\n -7479.898\n \n zipcode\n \n 98125.000\n
\n -11595.488\n \n sqft_living\n \n 1880.000\n
\n -13477.596\n \n sqft_living15\n \n 1390.000\n
\n -19283.361\n \n sqft_basement\n \n 940.000\n
\n -87994.612\n \n grade\n \n 7.000\n
\n\n \n \n\n \n\n\n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n"]}}],"execution_count":0},{"cell_type":"code","source":["explain_weights(model)"],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"278c1a9c-6dfd-41a0-b69b-e92e48135923","inputWidgets":{},"title":""}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.4605\n \n \n grade\n
\n 0.2345\n \n \n waterfront\n
\n 0.1117\n \n \n sqft_living\n
\n 0.0533\n \n \n lat\n
\n 0.0299\n \n \n view\n
\n 0.0262\n \n \n long\n
\n 0.0161\n \n \n yr_built\n
\n 0.0142\n \n \n zipcode\n
\n 0.0128\n \n \n sqft_living15\n
\n 0.0082\n \n \n sqft_above\n
\n 0.0067\n \n \n sqft_lot\n
\n 0.0060\n \n \n condition\n
\n 0.0041\n \n \n yr_renovated\n
\n 0.0039\n \n \n floors\n
\n 0.0037\n \n \n bathrooms\n
\n 0.0035\n \n \n sqft_lot15\n
\n 0.0033\n \n \n sqft_basement\n
\n 0.0014\n \n \n bedrooms\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n","textData":null,"removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n \n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
WeightFeature
\n 0.4605\n \n \n grade\n
\n 0.2345\n \n \n waterfront\n
\n 0.1117\n \n \n sqft_living\n
\n 0.0533\n \n \n lat\n
\n 0.0299\n \n \n view\n
\n 0.0262\n \n \n long\n
\n 0.0161\n \n \n yr_built\n
\n 0.0142\n \n \n zipcode\n
\n 0.0128\n \n \n sqft_living15\n
\n 0.0082\n \n \n sqft_above\n
\n 0.0067\n \n \n sqft_lot\n
\n 0.0060\n \n \n condition\n
\n 0.0041\n \n \n yr_renovated\n
\n 0.0039\n \n \n floors\n
\n 0.0037\n \n \n bathrooms\n
\n 0.0035\n \n \n sqft_lot15\n
\n 0.0033\n \n \n sqft_basement\n
\n 0.0014\n \n \n bedrooms\n
\n \n\n \n\n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n"]}}],"execution_count":0},{"cell_type":"code","source":[""],"metadata":{"application/vnd.databricks.v1+cell":{"showTitle":false,"cellMetadata":{},"nuid":"d0c9f235-fd7b-45b1-8923-004db97d30b6","inputWidgets":{},"title":""}},"outputs":[],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"ModelExplainers-ELI5","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":1862310893211944}},"nbformat":4,"nbformat_minor":0}