""" features/feature_engineer.py ------------------------------ Turns raw OHLCV into the full feature matrix used by Claude and the rule engine. """ import logging import numpy as np import pandas as pd from config.settings import EMA_PERIODS from features.indicators import TechnicalIndicators as TI logger = logging.getLogger(__name__) _REQUIRED = {"open", "high", "low", "close", "volume"} class FeatureEngineer: def build(self, df: pd.DataFrame) -> pd.DataFrame: missing = _REQUIRED - set(df.columns) if missing: raise ValueError(f"Missing columns: {missing}") out = pd.DataFrame(index=df.index) o, h, l, c, v = df["open"], df["high"], df["low"], df["close"], df["volume"] # raw price + candle geometry out["open"] = o out["high"] = h out["low"] = l out["close"] = c out["body"] = (c - o).abs() out["upper_wick"] = h - pd.concat([c, o], axis=1).max(axis=1) out["lower_wick"] = pd.concat([c, o], axis=1).min(axis=1) - l out["hl_range"] = h - l for p in [1, 3, 6, 12, 24]: out[f"ret_{p}"] = TI.price_change(c, p) # EMAs + distance from price emas = {} for period in EMA_PERIODS: emas[period] = TI.ema(c, period) out[f"ema_{period}"] = emas[period] out[f"close_vs_ema{period}"] = c / (emas[period] + 1e-9) - 1 for fast, slow in [(9, 21), (21, 50), (50, 200)]: if fast in emas and slow in emas: out[f"ema_cross_{fast}_{slow}"] = emas[fast] / (emas[slow] + 1e-9) - 1 # momentum out["rsi"] = TI.rsi(c) out["rsi_overbought"] = (out["rsi"] > 70).astype(int) out["rsi_oversold"] = (out["rsi"] < 30).astype(int) macd = TI.macd(c) out["macd"] = macd["macd"] out["macd_signal"]= macd["macd_signal"] out["macd_hist"] = macd["macd_hist"] out["macd_cross"] = np.sign(macd["macd_hist"]) bb = TI.bollinger_bands(c) out["bb_pct_b"] = bb["bb_pct_b"] out["bb_bandwidth"]= bb["bb_bandwidth"] out["atr"] = TI.atr(h, l, c) out["atr_pct"] = out["atr"] / (c + 1e-9) stoch = TI.stochastic(h, l, c) out["stoch_k"] = stoch["stoch_k"] out["stoch_d"] = stoch["stoch_d"] # volume out["volume"] = v out["volume_ma"] = TI.volume_ma(v) out["volume_ratio"] = TI.volume_ratio(v) out["obv"] = TI.obv(c, v) out["obv_ema"] = TI.ema(out["obv"], 20) out["vwap"] = TI.vwap(h, l, c, v) out["close_vs_vwap"]= c / (out["vwap"] + 1e-9) - 1 # volatility out["hist_vol_24"] = c.pct_change().rolling(24).std() * np.sqrt(24) out["hist_vol_72"] = c.pct_change().rolling(72).std() * np.sqrt(72) # regime / quant adx_df = TI.adx(h, l, c) out["adx"] = adx_df["adx"] out["plus_di"] = adx_df["plus_di"] out["minus_di"] = adx_df["minus_di"] out["efficiency_ratio"] = TI.efficiency_ratio(c, window=20) out["zscore_50"] = TI.zscore(c, window=50) n_before = len(out) out.dropna(inplace=True) logger.debug("Features: %d cols, dropped %d NaN rows (%d left)", len(out.columns), n_before - len(out), len(out)) return out