ignatz
/
okx-codex-trader


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
							from __future__ import annotations

import argparse
from dataclasses import dataclass
from pathlib import Path

import pandas as pd


CACHE_DIR = Path("data/okx-candles")
OUTPUT_DIR = Path("reports/eth-exploration")
SOURCE_TOTALS = OUTPUT_DIR / "eth-relative-momentum-totals.csv"
PREFIX = "eth-relmom-lb84-regime-gate"
INITIAL_EQUITY = 10_000.0
TAKER_FEE = 0.0004
BAR = "4H"
LOOKBACK = 84
HORIZONS = (
    ("full", None),
    ("3y", pd.DateOffset(years=3)),
    ("1y", pd.DateOffset(years=1)),
    ("6m", pd.DateOffset(months=6)),
    ("3m", pd.DateOffset(months=3)),
)


@dataclass(frozen=True)
class Params:
    trend: int
    rel_entry: float
    vol_quantile: float
    short_weight: float
    long_weight: float

    @property
    def base_name(self) -> str:
        return (
            f"eth_relmom-4H-lb84-tr{self.trend}"
            f"-re{self.rel_entry:.3f}-vq{self.vol_quantile:.1f}"
            f"-sw{self.short_weight:.2f}-lw{self.long_weight:.2f}"
        )


@dataclass(frozen=True)
class Gate:
    name: str
    description: str


GATES = (
    Gate("no_gate", "baseline 4H-lb84 signal without extra regime gate"),
    Gate("eth_bull90", "ETH trailing 90-day return > 0"),
    Gate("eth_bear90", "ETH trailing 90-day return <= 0"),
    Gate("btc_bull90", "BTC trailing 90-day return > 0"),
    Gate("btc_bear90", "BTC trailing 90-day return <= 0"),
    Gate("eth_bull90_high_vol", "ETH trailing 90-day return > 0 and ETH 30-day realized vol above trailing 365-day median"),
    Gate("eth_bull90_low_vol", "ETH trailing 90-day return > 0 and ETH 30-day realized vol at or below trailing 365-day median"),
    Gate("eth_bear90_high_vol", "ETH trailing 90-day return <= 0 and ETH 30-day realized vol above trailing 365-day median"),
    Gate("eth_bear90_low_vol", "ETH trailing 90-day return <= 0 and ETH 30-day realized vol at or below trailing 365-day median"),
    Gate("btc_bull90_high_vol", "BTC trailing 90-day return > 0 and ETH 30-day realized vol above trailing 365-day median"),
    Gate("btc_bear90_high_vol", "BTC trailing 90-day return <= 0 and ETH 30-day realized vol above trailing 365-day median"),
)


def load_15m(symbol: str) -> pd.DataFrame:
    path = CACHE_DIR / symbol / "15m.csv"
    frame = pd.read_csv(path)
    frame["ts"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
    return frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("ts")


def resample_4h(frame: pd.DataFrame) -> pd.DataFrame:
    out = frame.resample("4h", label="left", closed="left").agg(
        open=("open", "first"),
        high=("high", "max"),
        low=("low", "min"),
        close=("close", "last"),
        volume=("volume", "sum"),
    )
    return out.dropna()


def load_closes() -> pd.DataFrame:
    return pd.DataFrame(
        {
            symbol: resample_4h(load_15m(symbol))["close"]
            for symbol in ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
        }
    ).dropna()


def params_from_source(source: Path) -> list[Params]:
    frame = pd.read_csv(source)
    scoped = frame[
        (frame["bar"] == BAR)
        & (frame["lookback"] == LOOKBACK)
        & (frame["return_1y"] > 0.0)
        & (frame["return_6m"] > 0.0)
        & (frame["return_3m"] > 0.0)
    ].sort_values(["return_1y", "return_6m"], ascending=[False, False])
    params: list[Params] = []
    seen: set[tuple[int, float, float, float, float]] = set()
    for row in scoped.itertuples(index=False):
        key = (
            int(row.trend),
            float(row.rel_entry),
            float(row.vol_quantile),
            float(row.short_weight),
            float(row.long_weight),
        )
        if key in seen:
            continue
        seen.add(key)
        params.append(Params(*key))
    return params


def target_position(closes: pd.DataFrame, params: Params) -> pd.Series:
    eth = closes["ETH-USDT-SWAP"]
    btc = closes["BTC-USDT-SWAP"]
    eth_momentum = eth / eth.shift(LOOKBACK) - 1.0
    btc_momentum = btc / btc.shift(LOOKBACK) - 1.0
    relative = eth_momentum - btc_momentum
    eth_trend = eth.ewm(span=params.trend, adjust=False).mean()
    btc_trend = btc.ewm(span=params.trend, adjust=False).mean()
    eth_vol = eth.pct_change().rolling(LOOKBACK).std(ddof=1)
    vol_gate = eth_vol >= eth_vol.rolling(params.trend).quantile(params.vol_quantile)

    position = pd.Series(0.0, index=closes.index)
    short_signal = (relative <= -params.rel_entry) & (eth < eth_trend) & vol_gate
    long_signal = (relative >= params.rel_entry) & (eth > eth_trend) & (btc > btc_trend) & vol_gate
    position.loc[short_signal] = -params.short_weight
    position.loc[long_signal] = params.long_weight
    return position.fillna(0.0)


def regime_frame(closes: pd.DataFrame) -> pd.DataFrame:
    bars_per_day = 6
    eth = closes["ETH-USDT-SWAP"]
    btc = closes["BTC-USDT-SWAP"]
    out = pd.DataFrame(index=closes.index)
    out["eth_ret_90d"] = eth / eth.shift(90 * bars_per_day) - 1.0
    out["btc_ret_90d"] = btc / btc.shift(90 * bars_per_day) - 1.0
    out["eth_rv_30d"] = eth.pct_change().rolling(30 * bars_per_day).std(ddof=1)
    out["eth_rv_365d_median"] = out["eth_rv_30d"].rolling(365 * bars_per_day).median()
    out["market_regime"] = "unclassified"
    out.loc[out["eth_ret_90d"] > 0.0, "market_regime"] = "bull_90d"
    out.loc[out["eth_ret_90d"] <= 0.0, "market_regime"] = "bear_90d"
    out["vol_regime"] = "unclassified"
    out.loc[out["eth_rv_30d"] > out["eth_rv_365d_median"], "vol_regime"] = "high_vol"
    out.loc[out["eth_rv_30d"] <= out["eth_rv_365d_median"], "vol_regime"] = "low_vol"
    return out


def gate_mask(regimes: pd.DataFrame, gate: Gate) -> pd.Series:
    high_vol = regimes["eth_rv_30d"] > regimes["eth_rv_365d_median"]
    low_vol = regimes["eth_rv_30d"] <= regimes["eth_rv_365d_median"]
    if gate.name == "no_gate":
        return pd.Series(True, index=regimes.index)
    if gate.name == "eth_bull90":
        return regimes["eth_ret_90d"] > 0.0
    if gate.name == "eth_bear90":
        return regimes["eth_ret_90d"] <= 0.0
    if gate.name == "btc_bull90":
        return regimes["btc_ret_90d"] > 0.0
    if gate.name == "btc_bear90":
        return regimes["btc_ret_90d"] <= 0.0
    if gate.name == "eth_bull90_high_vol":
        return (regimes["eth_ret_90d"] > 0.0) & high_vol
    if gate.name == "eth_bull90_low_vol":
        return (regimes["eth_ret_90d"] > 0.0) & low_vol
    if gate.name == "eth_bear90_high_vol":
        return (regimes["eth_ret_90d"] <= 0.0) & high_vol
    if gate.name == "eth_bear90_low_vol":
        return (regimes["eth_ret_90d"] <= 0.0) & low_vol
    if gate.name == "btc_bull90_high_vol":
        return (regimes["btc_ret_90d"] > 0.0) & high_vol
    if gate.name == "btc_bear90_high_vol":
        return (regimes["btc_ret_90d"] <= 0.0) & high_vol
    raise ValueError(f"unknown gate: {gate.name}")


def net_returns(closes: pd.DataFrame, position: pd.Series) -> pd.Series:
    eth_returns = closes["ETH-USDT-SWAP"].pct_change().fillna(0.0)
    executed = position.shift(1).fillna(0.0)
    turnover = executed.diff().abs().fillna(executed.abs())
    return executed * eth_returns - turnover * TAKER_FEE


def equity_from_returns(returns: pd.Series) -> pd.Series:
    equity = INITIAL_EQUITY * (1.0 + returns.fillna(0.0)).cumprod()
    equity.name = "equity"
    return equity


def trades_from_returns(position: pd.Series, returns: pd.Series) -> list[dict[str, object]]:
    executed = position.shift(1).fillna(0.0)
    active = executed != 0.0
    groups = (active.ne(active.shift(1)) | executed.ne(executed.shift(1))).cumsum()
    trades: list[dict[str, object]] = []
    for _, mask in active.groupby(groups):
        if not bool(mask.iloc[0]):
            continue
        index = mask.index
        trade_returns = returns.loc[index]
        trades.append(
            {
                "side": "short" if float(executed.loc[index[0]]) < 0.0 else "long",
                "entry_time": index[0],
                "exit_time": index[-1],
                "return": float((1.0 + trade_returns).prod() - 1.0),
            }
        )
    return trades


def series_metrics(series: pd.Series) -> dict[str, float]:
    if len(series) < 2:
        return {"total_return": 0.0, "annualized_return": 0.0, "max_drawdown": 0.0}
    years = (series.index[-1] - series.index[0]).total_seconds() / 86_400 / 365
    total = float(series.iloc[-1] / series.iloc[0] - 1.0)
    annualized = (1.0 + total) ** (1.0 / years) - 1.0 if total > -1.0 and years > 0.0 else 0.0
    drawdown = float((series.cummax() - series).div(series.cummax()).max())
    return {"total_return": total, "annualized_return": annualized, "max_drawdown": drawdown}


def trade_metrics(trades: list[dict[str, object]], start: pd.Timestamp, end: pd.Timestamp) -> dict[str, float | int]:
    scoped = [float(trade["return"]) for trade in trades if start <= pd.Timestamp(trade["exit_time"]) <= end]
    wins = [value for value in scoped if value > 0.0]
    losses = [value for value in scoped if value < 0.0]
    gross_profit = sum(wins)
    gross_loss = abs(sum(losses))
    return {
        "win_rate": len(wins) / len(scoped) if scoped else 0.0,
        "profit_factor": gross_profit / gross_loss if gross_loss else 0.0,
        "trades": len(scoped),
    }


def trade_metrics_for_mask(trades: list[dict[str, object]], mask: pd.Series) -> dict[str, float | int]:
    scoped = [float(trade["return"]) for trade in trades if bool(mask.reindex([pd.Timestamp(trade["exit_time"])]).fillna(False).iloc[0])]
    wins = [value for value in scoped if value > 0.0]
    losses = [value for value in scoped if value < 0.0]
    gross_profit = sum(wins)
    gross_loss = abs(sum(losses))
    return {
        "win_rate": len(wins) / len(scoped) if scoped else 0.0,
        "profit_factor": gross_profit / gross_loss if gross_loss else 0.0,
        "trades": len(scoped),
    }


def metric_row(
    name: str,
    params: Params,
    gate: Gate,
    segment_type: str,
    segment: str,
    equity: pd.Series,
    trades: list[dict[str, object]],
    mask: pd.Series | None = None,
) -> dict[str, object]:
    scoped = equity if mask is None else equity[mask.reindex(equity.index).fillna(False)]
    if len(scoped) < 2:
        start = equity.index[0]
        end = equity.index[-1]
        metrics = {"total_return": 0.0, "annualized_return": 0.0, "max_drawdown": 0.0}
        trade_stats = {"win_rate": 0.0, "profit_factor": 0.0, "trades": 0}
    else:
        start = scoped.index[0]
        end = scoped.index[-1]
        metrics = series_metrics(scoped)
        trade_stats = trade_metrics(trades, start, end)
    return {
        "name": name,
        "base_name": params.base_name,
        "gate": gate.name,
        "gate_description": gate.description,
        "segment_type": segment_type,
        "segment": segment,
        "start": start.strftime("%Y-%m-%d"),
        "end": end.strftime("%Y-%m-%d"),
        "bar": BAR,
        "lookback": LOOKBACK,
        "trend": params.trend,
        "rel_entry": params.rel_entry,
        "vol_quantile": params.vol_quantile,
        "short_weight": params.short_weight,
        "long_weight": params.long_weight,
        **metrics,
        **trade_stats,
    }


def segment_metric_row(
    name: str,
    params: Params,
    gate: Gate,
    segment_type: str,
    segment: str,
    returns: pd.Series,
    trades: list[dict[str, object]],
    mask: pd.Series,
) -> dict[str, object]:
    aligned_mask = mask.reindex(returns.index).fillna(False)
    scoped_returns = returns[aligned_mask].copy()
    if len(scoped_returns) < 2:
        start = returns.index[0]
        end = returns.index[-1]
        metrics = {"total_return": 0.0, "annualized_return": 0.0, "max_drawdown": 0.0}
        trade_stats = {"win_rate": 0.0, "profit_factor": 0.0, "trades": 0}
    else:
        scoped_returns.iloc[0] = 0.0
        scoped_equity = equity_from_returns(scoped_returns)
        start = scoped_equity.index[0]
        end = scoped_equity.index[-1]
        metrics = series_metrics(scoped_equity)
        trade_stats = trade_metrics_for_mask(trades, aligned_mask)
    return {
        "name": name,
        "base_name": params.base_name,
        "gate": gate.name,
        "gate_description": gate.description,
        "segment_type": segment_type,
        "segment": segment,
        "start": start.strftime("%Y-%m-%d"),
        "end": end.strftime("%Y-%m-%d"),
        "bar": BAR,
        "lookback": LOOKBACK,
        "trend": params.trend,
        "rel_entry": params.rel_entry,
        "vol_quantile": params.vol_quantile,
        "short_weight": params.short_weight,
        "long_weight": params.long_weight,
        **metrics,
        **trade_stats,
    }


def horizon_rows(name: str, params: Params, gate: Gate, equity: pd.Series, trades: list[dict[str, object]]) -> list[dict[str, object]]:
    rows = []
    end = equity.index[-1]
    for horizon, offset in HORIZONS:
        mask = None if offset is None else pd.Series(equity.index >= end - offset, index=equity.index)
        rows.append(metric_row(name, params, gate, "horizon", horizon, equity, trades, mask))
    return rows


def period_rows(
    name: str,
    params: Params,
    gate: Gate,
    returns: pd.Series,
    trades: list[dict[str, object]],
    regimes: pd.DataFrame,
) -> list[dict[str, object]]:
    rows: list[dict[str, object]] = []
    for year in sorted(set(returns.index.year)):
        rows.append(segment_metric_row(name, params, gate, "year", str(year), returns, trades, pd.Series(returns.index.year == year, index=returns.index)))
    for segment in ("bull_90d", "bear_90d"):
        rows.append(segment_metric_row(name, params, gate, "market_regime", segment, returns, trades, regimes["market_regime"] == segment))
    for segment in ("high_vol", "low_vol"):
        rows.append(segment_metric_row(name, params, gate, "vol_regime", segment, returns, trades, regimes["vol_regime"] == segment))
    return rows


def markdown_table(frame: pd.DataFrame) -> str:
    values = [list(frame.columns), ["---" for _ in frame.columns]]
    values.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
    lines = []
    for row in values:
        cells = []
        for value in row:
            cells.append(f"{value:.6g}" if isinstance(value, float) else str(value).replace("|", "\\|"))
        lines.append("| " + " | ".join(cells) + " |")
    return "\n".join(lines)


def write_report(command: str, paths: list[Path], selected: pd.DataFrame, horizons: pd.DataFrame, periods: pd.DataFrame, qualified: pd.DataFrame) -> str:
    conclusion = (
        "ACCEPT: at least one explicit non-forward-looking regime gate passed the full sample, all required recent horizons, yearly rows, and bull/bear/volatility segment checks."
        if len(qualified)
        else "REJECT: no 4H-lb84 candidate produced a full-sample logically closed usable strategy after explicit regime gating."
    )
    display_cols = [
        "name",
        "total_return",
        "annualized_return",
        "max_drawdown",
        "win_rate",
        "profit_factor",
        "trades",
        "return_3y",
        "return_1y",
        "return_6m",
        "return_3m",
        "min_year_return",
        "min_market_return",
        "min_vol_return",
        "usable",
    ]
    selected_display = selected[display_cols].head(20)
    selected_names = set(selected["name"].head(5))
    horizon_display = horizons[horizons["name"].isin(selected_names)]
    period_display = periods[periods["name"].isin(selected_names)]
    return "\n".join(
        [
            "# ETH Relative Momentum 4H-lb84 Regime Gate Validation",
            "",
            f"Run command: `{command}`",
            "",
            "Output files:",
            *[f"- `{path}`" for path in paths],
            "",
            "Scope: offline validation only, using cached OKX ETH-USDT-SWAP and BTC-USDT-SWAP 15m candles resampled to 4H. No live API path or order path is used.",
            "No-future rule: all gates use trailing values available at the 4H close; positions are shifted one bar, so execution starts on the next 4H bar.",
            "Gate set: no gate, ETH/BTC trailing 90-day bull/bear, and those states crossed with ETH 30-day realized volatility above/below its trailing 365-day rolling median.",
            "Usable filter: positive full/3y/1y/6m/3m returns, full max drawdown <= 35%, PF > 1, at least 20 trades, no negative calendar year, no negative bull/bear segment, and no negative high/low-vol segment.",
            "",
            f"Conclusion: {conclusion}",
            "",
            "## Top Rows",
            "",
            markdown_table(selected_display),
            "",
            "## Required Horizons For Top 5",
            "",
            markdown_table(horizon_display),
            "",
            "## Year And Regime Segments For Top 5",
            "",
            markdown_table(period_display),
            "",
        ]
    )


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
    parser.add_argument("--source-totals", type=Path, default=SOURCE_TOTALS)
    args = parser.parse_args()

    args.output_dir.mkdir(parents=True, exist_ok=True)
    params_list = params_from_source(args.source_totals)
    if not params_list:
        raise RuntimeError("no recent-positive 4H-lb84 candidates found in source totals")

    closes = load_closes()
    regimes = regime_frame(closes)

    total_rows: list[dict[str, object]] = []
    horizon_data: list[dict[str, object]] = []
    period_data: list[dict[str, object]] = []
    for params in params_list:
        base_position = target_position(closes, params)
        for gate in GATES:
            position = base_position.where(gate_mask(regimes, gate).reindex(base_position.index).fillna(False), 0.0)
            returns = net_returns(closes, position)
            equity = equity_from_returns(returns)
            trades = trades_from_returns(position, returns)
            name = f"{params.base_name}-gate-{gate.name}"
            horizons = horizon_rows(name, params, gate, equity, trades)
            periods = period_rows(name, params, gate, returns, trades, regimes)
            by_horizon = {row["segment"]: row for row in horizons}
            full = by_horizon["full"]
            year_returns = [float(row["total_return"]) for row in periods if row["segment_type"] == "year"]
            market_returns = [float(row["total_return"]) for row in periods if row["segment_type"] == "market_regime"]
            vol_returns = [float(row["total_return"]) for row in periods if row["segment_type"] == "vol_regime"]
            row = {
                **full,
                "return_3y": float(by_horizon["3y"]["total_return"]),
                "return_1y": float(by_horizon["1y"]["total_return"]),
                "return_6m": float(by_horizon["6m"]["total_return"]),
                "return_3m": float(by_horizon["3m"]["total_return"]),
                "min_year_return": min(year_returns),
                "min_market_return": min(market_returns),
                "min_vol_return": min(vol_returns),
            }
            row["usable"] = (
                row["total_return"] > 0.0
                and row["return_3y"] > 0.0
                and row["return_1y"] > 0.0
                and row["return_6m"] > 0.0
                and row["return_3m"] > 0.0
                and row["max_drawdown"] <= 0.35
                and row["profit_factor"] > 1.0
                and row["trades"] >= 20
                and row["min_year_return"] >= 0.0
                and row["min_market_return"] >= 0.0
                and row["min_vol_return"] >= 0.0
            )
            row["score"] = (
                float(row["annualized_return"])
                - float(row["max_drawdown"])
                + float(row["return_1y"])
                + 0.5 * float(row["return_6m"])
                + 0.25 * float(row["return_3m"])
                + 0.5 * float(row["min_year_return"])
                + 0.5 * float(row["min_market_return"])
                + 0.5 * float(row["min_vol_return"])
            )
            total_rows.append(row)
            horizon_data.extend(horizons)
            period_data.extend(periods)

    totals = pd.DataFrame(total_rows).sort_values(["usable", "score"], ascending=[False, False])
    horizons = pd.DataFrame(horizon_data)
    periods = pd.DataFrame(period_data)
    qualified = totals[totals["usable"]]
    selected = qualified if len(qualified) else totals.head(25)

    totals_path = args.output_dir / f"{PREFIX}-totals.csv"
    selected_path = args.output_dir / f"{PREFIX}-selected.csv"
    horizon_path = args.output_dir / f"{PREFIX}-horizons.csv"
    period_path = args.output_dir / f"{PREFIX}-periods.csv"
    report_path = args.output_dir / f"{PREFIX}-report.md"
    totals.to_csv(totals_path, index=False)
    selected.to_csv(selected_path, index=False)
    horizons[horizons["name"].isin(set(selected["name"]))].to_csv(horizon_path, index=False)
    periods[periods["name"].isin(set(selected["name"]))].to_csv(period_path, index=False)
    report_path.write_text(
        write_report(
            "rtk .venv/bin/python scripts/validate_eth_relmom_lb84_regime_gate.py",
            [totals_path, selected_path, horizon_path, period_path, report_path],
            selected,
            horizons[horizons["name"].isin(set(selected["name"]))],
            periods[periods["name"].isin(set(selected["name"]))],
            qualified,
        ),
        encoding="utf-8",
    )
    print(report_path)
    print(selected.head(10).to_string(index=False))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())