# arcus/harness_rl/stressors/valence_inversion.py
"""
Valence Inversion Stressor.

Corrupts the reward signal by flipping its sign during the SHOCK phase:
    r_exec = -r

Regret is measured as 2 * |r| (the full swing from +r to -r).
"""
from __future__ import annotations

from typing import Any, Dict, Tuple
from gymnasium import spaces

from .base import BaseStressor


class ValenceInversionStressor(BaseStressor):
    name = "valence_inversion"

    def transform_step(
        self,
        action: Any,
        obs: Any,
        reward: float,
        terminated: bool,
        truncated: bool,
        info: Dict[str, Any],
        *,
        action_space: spaces.Space,
        active: bool,
        phase: str,
    ) -> Tuple[Any, float, bool, bool, Dict[str, Any]]:
        info.setdefault("violation",     0.0)
        info.setdefault("regret",        0.0)

        if not active:
            info["stress_applied"] = 0
            return obs, float(reward), bool(terminated), bool(truncated), info

        new_r              = -float(reward)
        info["regret"]     = float(2.0 * abs(float(reward)))
        info["stress_applied"] = 1
        return obs, float(new_r), bool(terminated), bool(truncated), info