from sklearn.datasets import fetch_california_housing from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.preprocessing import StandardScaler import numpy as np # Load the California Housing dataset data = fetch_california_housing() X, y = data.data, data.target # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Standardize the features for better performance in some models scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Define quantiles for lower and upper bounds low_quantile = 0.05 high_quantile = 0.95 # --- Random Forest with Quantile Estimation --- # Initialize and train the Random Forest Regressor rf_model = RandomForestRegressor(n_estimators=100, random_state=42) rf_model.fit(X_train, y_train) # Predict the lower and upper quantiles low_quantile_pred_rf = np.percentile([tree.predict(X_test) for tree in rf_model.estimators_], low_quantile * 100, axis=0) high_quantile_pred_rf = np.percentile([tree.predict(X_test) for tree in rf_model.estimators_], high_quantile * 100, axis=0) # Predict the mean (central) estimate for Random Forest mean_pred_rf = rf_model.predict(X_test) # Display results for Random Forest print("Random Forest Predictions:") print("Mean Predictions:", mean_pred_rf[:10]) print(f"{int(low_quantile*100)}th Percentile Predictions (Lower Bound):", low_quantile_pred_rf[:10]) print(f"{int(high_quantile*100)}th Percentile Predictions (Upper Bound):", high_quantile_pred_rf[:10])