import pandas as pd import numpy as np from hmmlearn import hmm import matplotlib.pyplot as plt from regime_features import calculate_garman_klass, get_rolling_hurst import os # Config DATA_PATH = r'c:\Users\saladass\Documents\abetory\VEBB\Data\15m\btc_15m_data_2018_to_2025.csv' N_STATES = 3 def train_regime_model(): print("Loading data for HMM training...") df = pd.read_csv(DATA_PATH) # Use 2023-2024 for training df['datetime'] = pd.to_datetime(df['Open time']) train_df = df[(df['datetime'].dt.year >= 2023) & (df['datetime'].dt.year <= 2024)].copy() print(f"Feature Engineering on {len(train_df)} bars...") # Add a small epsilon to avoid log(0) train_df['log_ret'] = np.log(train_df['Close'] / train_df['Close'].shift(1)) train_df['vol_gk'] = calculate_garman_klass(train_df) train_df['hurst'] = get_rolling_hurst(train_df['Close'], window=100) train_df = train_df.replace([np.inf, -np.inf], np.nan).dropna() # Features for HMM: [Log Returns, GK Volatility, Hurst] X = train_df[['log_ret', 'vol_gk', 'hurst']].values print(f"Fitting GaussianHMM with {N_STATES} states...") model = hmm.GaussianHMM(n_components=N_STATES, covariance_type="full", n_iter=1000, random_state=42) model.fit(X) return model, train_df def predict_regimes(model, year=2025): print(f"Predicting regimes for {year}...") df = pd.read_csv(DATA_PATH) df['datetime'] = pd.to_datetime(df['Open time']) test_df = df[df['datetime'].dt.year == year].copy() test_df['log_ret'] = np.log(test_df['Close'] / test_df['Close'].shift(1)) test_df['vol_gk'] = calculate_garman_klass(test_df) test_df['hurst'] = get_rolling_hurst(test_df['Close'], window=100) test_df.dropna(inplace=True) X = test_df[['log_ret', 'vol_gk', 'hurst']].values test_df['regime'] = model.predict(X) return test_df if __name__ == "__main__": try: model, train_data = train_regime_model() results_2025 = predict_regimes(model, year=2025) # Analyze regimes print("\nRegime Statistics (2025):") for i in range(N_STATES): subset = results_2025[results_2025['regime'] == i] avg_ret = subset['log_ret'].mean() * 100 avg_vol = subset['vol_gk'].mean() avg_hurst = subset['hurst'].mean() print(f"State {i}: Count={len(subset)}, Avg Ret={avg_ret:.4f}%, Avg Vol={avg_vol:.4f}, Avg Hurst={avg_hurst:.4f}") # Save results for backtester results_2025.to_csv('regime_results_2025.csv', index=False) print("\nResults saved to regime_results_2025.csv") except Exception as e: print(f"Error: {e}")