from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the California Housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features for better performance in some models
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define quantiles for lower and upper bounds
low_quantile = 0.05
high_quantile = 0.95

# --- Random Forest with Quantile Estimation ---

# Initialize and train the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict the lower and upper quantiles
low_quantile_pred_rf = np.percentile([tree.predict(X_test) for tree in rf_model.estimators_], low_quantile * 100, axis=0)
high_quantile_pred_rf = np.percentile([tree.predict(X_test) for tree in rf_model.estimators_], high_quantile * 100, axis=0)

# Predict the mean (central) estimate for Random Forest
mean_pred_rf = rf_model.predict(X_test)

# Display results for Random Forest
print("Random Forest Predictions:")
print("Mean Predictions:", mean_pred_rf[:10])
print(f"{int(low_quantile*100)}th Percentile Predictions (Lower Bound):", low_quantile_pred_rf[:10])
print(f"{int(high_quantile*100)}th Percentile Predictions (Upper Bound):", high_quantile_pred_rf[:10])