| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539 |
- from __future__ import annotations
- import argparse
- from dataclasses import dataclass
- from pathlib import Path
- import pandas as pd
- CACHE_DIR = Path("data/okx-candles")
- OUTPUT_DIR = Path("reports/eth-exploration")
- SOURCE_TOTALS = OUTPUT_DIR / "eth-relative-momentum-totals.csv"
- PREFIX = "eth-relmom-lb84-regime-gate"
- INITIAL_EQUITY = 10_000.0
- TAKER_FEE = 0.0004
- BAR = "4H"
- LOOKBACK = 84
- HORIZONS = (
- ("full", None),
- ("3y", pd.DateOffset(years=3)),
- ("1y", pd.DateOffset(years=1)),
- ("6m", pd.DateOffset(months=6)),
- ("3m", pd.DateOffset(months=3)),
- )
- @dataclass(frozen=True)
- class Params:
- trend: int
- rel_entry: float
- vol_quantile: float
- short_weight: float
- long_weight: float
- @property
- def base_name(self) -> str:
- return (
- f"eth_relmom-4H-lb84-tr{self.trend}"
- f"-re{self.rel_entry:.3f}-vq{self.vol_quantile:.1f}"
- f"-sw{self.short_weight:.2f}-lw{self.long_weight:.2f}"
- )
- @dataclass(frozen=True)
- class Gate:
- name: str
- description: str
- GATES = (
- Gate("no_gate", "baseline 4H-lb84 signal without extra regime gate"),
- Gate("eth_bull90", "ETH trailing 90-day return > 0"),
- Gate("eth_bear90", "ETH trailing 90-day return <= 0"),
- Gate("btc_bull90", "BTC trailing 90-day return > 0"),
- Gate("btc_bear90", "BTC trailing 90-day return <= 0"),
- Gate("eth_bull90_high_vol", "ETH trailing 90-day return > 0 and ETH 30-day realized vol above trailing 365-day median"),
- Gate("eth_bull90_low_vol", "ETH trailing 90-day return > 0 and ETH 30-day realized vol at or below trailing 365-day median"),
- Gate("eth_bear90_high_vol", "ETH trailing 90-day return <= 0 and ETH 30-day realized vol above trailing 365-day median"),
- Gate("eth_bear90_low_vol", "ETH trailing 90-day return <= 0 and ETH 30-day realized vol at or below trailing 365-day median"),
- Gate("btc_bull90_high_vol", "BTC trailing 90-day return > 0 and ETH 30-day realized vol above trailing 365-day median"),
- Gate("btc_bear90_high_vol", "BTC trailing 90-day return <= 0 and ETH 30-day realized vol above trailing 365-day median"),
- )
- def load_15m(symbol: str) -> pd.DataFrame:
- path = CACHE_DIR / symbol / "15m.csv"
- frame = pd.read_csv(path)
- frame["ts"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
- return frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("ts")
- def resample_4h(frame: pd.DataFrame) -> pd.DataFrame:
- out = frame.resample("4h", label="left", closed="left").agg(
- open=("open", "first"),
- high=("high", "max"),
- low=("low", "min"),
- close=("close", "last"),
- volume=("volume", "sum"),
- )
- return out.dropna()
- def load_closes() -> pd.DataFrame:
- return pd.DataFrame(
- {
- symbol: resample_4h(load_15m(symbol))["close"]
- for symbol in ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
- }
- ).dropna()
- def params_from_source(source: Path) -> list[Params]:
- frame = pd.read_csv(source)
- scoped = frame[
- (frame["bar"] == BAR)
- & (frame["lookback"] == LOOKBACK)
- & (frame["return_1y"] > 0.0)
- & (frame["return_6m"] > 0.0)
- & (frame["return_3m"] > 0.0)
- ].sort_values(["return_1y", "return_6m"], ascending=[False, False])
- params: list[Params] = []
- seen: set[tuple[int, float, float, float, float]] = set()
- for row in scoped.itertuples(index=False):
- key = (
- int(row.trend),
- float(row.rel_entry),
- float(row.vol_quantile),
- float(row.short_weight),
- float(row.long_weight),
- )
- if key in seen:
- continue
- seen.add(key)
- params.append(Params(*key))
- return params
- def target_position(closes: pd.DataFrame, params: Params) -> pd.Series:
- eth = closes["ETH-USDT-SWAP"]
- btc = closes["BTC-USDT-SWAP"]
- eth_momentum = eth / eth.shift(LOOKBACK) - 1.0
- btc_momentum = btc / btc.shift(LOOKBACK) - 1.0
- relative = eth_momentum - btc_momentum
- eth_trend = eth.ewm(span=params.trend, adjust=False).mean()
- btc_trend = btc.ewm(span=params.trend, adjust=False).mean()
- eth_vol = eth.pct_change().rolling(LOOKBACK).std(ddof=1)
- vol_gate = eth_vol >= eth_vol.rolling(params.trend).quantile(params.vol_quantile)
- position = pd.Series(0.0, index=closes.index)
- short_signal = (relative <= -params.rel_entry) & (eth < eth_trend) & vol_gate
- long_signal = (relative >= params.rel_entry) & (eth > eth_trend) & (btc > btc_trend) & vol_gate
- position.loc[short_signal] = -params.short_weight
- position.loc[long_signal] = params.long_weight
- return position.fillna(0.0)
- def regime_frame(closes: pd.DataFrame) -> pd.DataFrame:
- bars_per_day = 6
- eth = closes["ETH-USDT-SWAP"]
- btc = closes["BTC-USDT-SWAP"]
- out = pd.DataFrame(index=closes.index)
- out["eth_ret_90d"] = eth / eth.shift(90 * bars_per_day) - 1.0
- out["btc_ret_90d"] = btc / btc.shift(90 * bars_per_day) - 1.0
- out["eth_rv_30d"] = eth.pct_change().rolling(30 * bars_per_day).std(ddof=1)
- out["eth_rv_365d_median"] = out["eth_rv_30d"].rolling(365 * bars_per_day).median()
- out["market_regime"] = "unclassified"
- out.loc[out["eth_ret_90d"] > 0.0, "market_regime"] = "bull_90d"
- out.loc[out["eth_ret_90d"] <= 0.0, "market_regime"] = "bear_90d"
- out["vol_regime"] = "unclassified"
- out.loc[out["eth_rv_30d"] > out["eth_rv_365d_median"], "vol_regime"] = "high_vol"
- out.loc[out["eth_rv_30d"] <= out["eth_rv_365d_median"], "vol_regime"] = "low_vol"
- return out
- def gate_mask(regimes: pd.DataFrame, gate: Gate) -> pd.Series:
- high_vol = regimes["eth_rv_30d"] > regimes["eth_rv_365d_median"]
- low_vol = regimes["eth_rv_30d"] <= regimes["eth_rv_365d_median"]
- if gate.name == "no_gate":
- return pd.Series(True, index=regimes.index)
- if gate.name == "eth_bull90":
- return regimes["eth_ret_90d"] > 0.0
- if gate.name == "eth_bear90":
- return regimes["eth_ret_90d"] <= 0.0
- if gate.name == "btc_bull90":
- return regimes["btc_ret_90d"] > 0.0
- if gate.name == "btc_bear90":
- return regimes["btc_ret_90d"] <= 0.0
- if gate.name == "eth_bull90_high_vol":
- return (regimes["eth_ret_90d"] > 0.0) & high_vol
- if gate.name == "eth_bull90_low_vol":
- return (regimes["eth_ret_90d"] > 0.0) & low_vol
- if gate.name == "eth_bear90_high_vol":
- return (regimes["eth_ret_90d"] <= 0.0) & high_vol
- if gate.name == "eth_bear90_low_vol":
- return (regimes["eth_ret_90d"] <= 0.0) & low_vol
- if gate.name == "btc_bull90_high_vol":
- return (regimes["btc_ret_90d"] > 0.0) & high_vol
- if gate.name == "btc_bear90_high_vol":
- return (regimes["btc_ret_90d"] <= 0.0) & high_vol
- raise ValueError(f"unknown gate: {gate.name}")
- def net_returns(closes: pd.DataFrame, position: pd.Series) -> pd.Series:
- eth_returns = closes["ETH-USDT-SWAP"].pct_change().fillna(0.0)
- executed = position.shift(1).fillna(0.0)
- turnover = executed.diff().abs().fillna(executed.abs())
- return executed * eth_returns - turnover * TAKER_FEE
- def equity_from_returns(returns: pd.Series) -> pd.Series:
- equity = INITIAL_EQUITY * (1.0 + returns.fillna(0.0)).cumprod()
- equity.name = "equity"
- return equity
- def trades_from_returns(position: pd.Series, returns: pd.Series) -> list[dict[str, object]]:
- executed = position.shift(1).fillna(0.0)
- active = executed != 0.0
- groups = (active.ne(active.shift(1)) | executed.ne(executed.shift(1))).cumsum()
- trades: list[dict[str, object]] = []
- for _, mask in active.groupby(groups):
- if not bool(mask.iloc[0]):
- continue
- index = mask.index
- trade_returns = returns.loc[index]
- trades.append(
- {
- "side": "short" if float(executed.loc[index[0]]) < 0.0 else "long",
- "entry_time": index[0],
- "exit_time": index[-1],
- "return": float((1.0 + trade_returns).prod() - 1.0),
- }
- )
- return trades
- def series_metrics(series: pd.Series) -> dict[str, float]:
- if len(series) < 2:
- return {"total_return": 0.0, "annualized_return": 0.0, "max_drawdown": 0.0}
- years = (series.index[-1] - series.index[0]).total_seconds() / 86_400 / 365
- total = float(series.iloc[-1] / series.iloc[0] - 1.0)
- annualized = (1.0 + total) ** (1.0 / years) - 1.0 if total > -1.0 and years > 0.0 else 0.0
- drawdown = float((series.cummax() - series).div(series.cummax()).max())
- return {"total_return": total, "annualized_return": annualized, "max_drawdown": drawdown}
- def trade_metrics(trades: list[dict[str, object]], start: pd.Timestamp, end: pd.Timestamp) -> dict[str, float | int]:
- scoped = [float(trade["return"]) for trade in trades if start <= pd.Timestamp(trade["exit_time"]) <= end]
- wins = [value for value in scoped if value > 0.0]
- losses = [value for value in scoped if value < 0.0]
- gross_profit = sum(wins)
- gross_loss = abs(sum(losses))
- return {
- "win_rate": len(wins) / len(scoped) if scoped else 0.0,
- "profit_factor": gross_profit / gross_loss if gross_loss else 0.0,
- "trades": len(scoped),
- }
- def trade_metrics_for_mask(trades: list[dict[str, object]], mask: pd.Series) -> dict[str, float | int]:
- scoped = [float(trade["return"]) for trade in trades if bool(mask.reindex([pd.Timestamp(trade["exit_time"])]).fillna(False).iloc[0])]
- wins = [value for value in scoped if value > 0.0]
- losses = [value for value in scoped if value < 0.0]
- gross_profit = sum(wins)
- gross_loss = abs(sum(losses))
- return {
- "win_rate": len(wins) / len(scoped) if scoped else 0.0,
- "profit_factor": gross_profit / gross_loss if gross_loss else 0.0,
- "trades": len(scoped),
- }
- def metric_row(
- name: str,
- params: Params,
- gate: Gate,
- segment_type: str,
- segment: str,
- equity: pd.Series,
- trades: list[dict[str, object]],
- mask: pd.Series | None = None,
- ) -> dict[str, object]:
- scoped = equity if mask is None else equity[mask.reindex(equity.index).fillna(False)]
- if len(scoped) < 2:
- start = equity.index[0]
- end = equity.index[-1]
- metrics = {"total_return": 0.0, "annualized_return": 0.0, "max_drawdown": 0.0}
- trade_stats = {"win_rate": 0.0, "profit_factor": 0.0, "trades": 0}
- else:
- start = scoped.index[0]
- end = scoped.index[-1]
- metrics = series_metrics(scoped)
- trade_stats = trade_metrics(trades, start, end)
- return {
- "name": name,
- "base_name": params.base_name,
- "gate": gate.name,
- "gate_description": gate.description,
- "segment_type": segment_type,
- "segment": segment,
- "start": start.strftime("%Y-%m-%d"),
- "end": end.strftime("%Y-%m-%d"),
- "bar": BAR,
- "lookback": LOOKBACK,
- "trend": params.trend,
- "rel_entry": params.rel_entry,
- "vol_quantile": params.vol_quantile,
- "short_weight": params.short_weight,
- "long_weight": params.long_weight,
- **metrics,
- **trade_stats,
- }
- def segment_metric_row(
- name: str,
- params: Params,
- gate: Gate,
- segment_type: str,
- segment: str,
- returns: pd.Series,
- trades: list[dict[str, object]],
- mask: pd.Series,
- ) -> dict[str, object]:
- aligned_mask = mask.reindex(returns.index).fillna(False)
- scoped_returns = returns[aligned_mask].copy()
- if len(scoped_returns) < 2:
- start = returns.index[0]
- end = returns.index[-1]
- metrics = {"total_return": 0.0, "annualized_return": 0.0, "max_drawdown": 0.0}
- trade_stats = {"win_rate": 0.0, "profit_factor": 0.0, "trades": 0}
- else:
- scoped_returns.iloc[0] = 0.0
- scoped_equity = equity_from_returns(scoped_returns)
- start = scoped_equity.index[0]
- end = scoped_equity.index[-1]
- metrics = series_metrics(scoped_equity)
- trade_stats = trade_metrics_for_mask(trades, aligned_mask)
- return {
- "name": name,
- "base_name": params.base_name,
- "gate": gate.name,
- "gate_description": gate.description,
- "segment_type": segment_type,
- "segment": segment,
- "start": start.strftime("%Y-%m-%d"),
- "end": end.strftime("%Y-%m-%d"),
- "bar": BAR,
- "lookback": LOOKBACK,
- "trend": params.trend,
- "rel_entry": params.rel_entry,
- "vol_quantile": params.vol_quantile,
- "short_weight": params.short_weight,
- "long_weight": params.long_weight,
- **metrics,
- **trade_stats,
- }
- def horizon_rows(name: str, params: Params, gate: Gate, equity: pd.Series, trades: list[dict[str, object]]) -> list[dict[str, object]]:
- rows = []
- end = equity.index[-1]
- for horizon, offset in HORIZONS:
- mask = None if offset is None else pd.Series(equity.index >= end - offset, index=equity.index)
- rows.append(metric_row(name, params, gate, "horizon", horizon, equity, trades, mask))
- return rows
- def period_rows(
- name: str,
- params: Params,
- gate: Gate,
- returns: pd.Series,
- trades: list[dict[str, object]],
- regimes: pd.DataFrame,
- ) -> list[dict[str, object]]:
- rows: list[dict[str, object]] = []
- for year in sorted(set(returns.index.year)):
- rows.append(segment_metric_row(name, params, gate, "year", str(year), returns, trades, pd.Series(returns.index.year == year, index=returns.index)))
- for segment in ("bull_90d", "bear_90d"):
- rows.append(segment_metric_row(name, params, gate, "market_regime", segment, returns, trades, regimes["market_regime"] == segment))
- for segment in ("high_vol", "low_vol"):
- rows.append(segment_metric_row(name, params, gate, "vol_regime", segment, returns, trades, regimes["vol_regime"] == segment))
- return rows
- def markdown_table(frame: pd.DataFrame) -> str:
- values = [list(frame.columns), ["---" for _ in frame.columns]]
- values.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
- lines = []
- for row in values:
- cells = []
- for value in row:
- cells.append(f"{value:.6g}" if isinstance(value, float) else str(value).replace("|", "\\|"))
- lines.append("| " + " | ".join(cells) + " |")
- return "\n".join(lines)
- def write_report(command: str, paths: list[Path], selected: pd.DataFrame, horizons: pd.DataFrame, periods: pd.DataFrame, qualified: pd.DataFrame) -> str:
- conclusion = (
- "ACCEPT: at least one explicit non-forward-looking regime gate passed the full sample, all required recent horizons, yearly rows, and bull/bear/volatility segment checks."
- if len(qualified)
- else "REJECT: no 4H-lb84 candidate produced a full-sample logically closed usable strategy after explicit regime gating."
- )
- display_cols = [
- "name",
- "total_return",
- "annualized_return",
- "max_drawdown",
- "win_rate",
- "profit_factor",
- "trades",
- "return_3y",
- "return_1y",
- "return_6m",
- "return_3m",
- "min_year_return",
- "min_market_return",
- "min_vol_return",
- "usable",
- ]
- selected_display = selected[display_cols].head(20)
- selected_names = set(selected["name"].head(5))
- horizon_display = horizons[horizons["name"].isin(selected_names)]
- period_display = periods[periods["name"].isin(selected_names)]
- return "\n".join(
- [
- "# ETH Relative Momentum 4H-lb84 Regime Gate Validation",
- "",
- f"Run command: `{command}`",
- "",
- "Output files:",
- *[f"- `{path}`" for path in paths],
- "",
- "Scope: offline validation only, using cached OKX ETH-USDT-SWAP and BTC-USDT-SWAP 15m candles resampled to 4H. No live API path or order path is used.",
- "No-future rule: all gates use trailing values available at the 4H close; positions are shifted one bar, so execution starts on the next 4H bar.",
- "Gate set: no gate, ETH/BTC trailing 90-day bull/bear, and those states crossed with ETH 30-day realized volatility above/below its trailing 365-day rolling median.",
- "Usable filter: positive full/3y/1y/6m/3m returns, full max drawdown <= 35%, PF > 1, at least 20 trades, no negative calendar year, no negative bull/bear segment, and no negative high/low-vol segment.",
- "",
- f"Conclusion: {conclusion}",
- "",
- "## Top Rows",
- "",
- markdown_table(selected_display),
- "",
- "## Required Horizons For Top 5",
- "",
- markdown_table(horizon_display),
- "",
- "## Year And Regime Segments For Top 5",
- "",
- markdown_table(period_display),
- "",
- ]
- )
- def main() -> int:
- parser = argparse.ArgumentParser()
- parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
- parser.add_argument("--source-totals", type=Path, default=SOURCE_TOTALS)
- args = parser.parse_args()
- args.output_dir.mkdir(parents=True, exist_ok=True)
- params_list = params_from_source(args.source_totals)
- if not params_list:
- raise RuntimeError("no recent-positive 4H-lb84 candidates found in source totals")
- closes = load_closes()
- regimes = regime_frame(closes)
- total_rows: list[dict[str, object]] = []
- horizon_data: list[dict[str, object]] = []
- period_data: list[dict[str, object]] = []
- for params in params_list:
- base_position = target_position(closes, params)
- for gate in GATES:
- position = base_position.where(gate_mask(regimes, gate).reindex(base_position.index).fillna(False), 0.0)
- returns = net_returns(closes, position)
- equity = equity_from_returns(returns)
- trades = trades_from_returns(position, returns)
- name = f"{params.base_name}-gate-{gate.name}"
- horizons = horizon_rows(name, params, gate, equity, trades)
- periods = period_rows(name, params, gate, returns, trades, regimes)
- by_horizon = {row["segment"]: row for row in horizons}
- full = by_horizon["full"]
- year_returns = [float(row["total_return"]) for row in periods if row["segment_type"] == "year"]
- market_returns = [float(row["total_return"]) for row in periods if row["segment_type"] == "market_regime"]
- vol_returns = [float(row["total_return"]) for row in periods if row["segment_type"] == "vol_regime"]
- row = {
- **full,
- "return_3y": float(by_horizon["3y"]["total_return"]),
- "return_1y": float(by_horizon["1y"]["total_return"]),
- "return_6m": float(by_horizon["6m"]["total_return"]),
- "return_3m": float(by_horizon["3m"]["total_return"]),
- "min_year_return": min(year_returns),
- "min_market_return": min(market_returns),
- "min_vol_return": min(vol_returns),
- }
- row["usable"] = (
- row["total_return"] > 0.0
- and row["return_3y"] > 0.0
- and row["return_1y"] > 0.0
- and row["return_6m"] > 0.0
- and row["return_3m"] > 0.0
- and row["max_drawdown"] <= 0.35
- and row["profit_factor"] > 1.0
- and row["trades"] >= 20
- and row["min_year_return"] >= 0.0
- and row["min_market_return"] >= 0.0
- and row["min_vol_return"] >= 0.0
- )
- row["score"] = (
- float(row["annualized_return"])
- - float(row["max_drawdown"])
- + float(row["return_1y"])
- + 0.5 * float(row["return_6m"])
- + 0.25 * float(row["return_3m"])
- + 0.5 * float(row["min_year_return"])
- + 0.5 * float(row["min_market_return"])
- + 0.5 * float(row["min_vol_return"])
- )
- total_rows.append(row)
- horizon_data.extend(horizons)
- period_data.extend(periods)
- totals = pd.DataFrame(total_rows).sort_values(["usable", "score"], ascending=[False, False])
- horizons = pd.DataFrame(horizon_data)
- periods = pd.DataFrame(period_data)
- qualified = totals[totals["usable"]]
- selected = qualified if len(qualified) else totals.head(25)
- totals_path = args.output_dir / f"{PREFIX}-totals.csv"
- selected_path = args.output_dir / f"{PREFIX}-selected.csv"
- horizon_path = args.output_dir / f"{PREFIX}-horizons.csv"
- period_path = args.output_dir / f"{PREFIX}-periods.csv"
- report_path = args.output_dir / f"{PREFIX}-report.md"
- totals.to_csv(totals_path, index=False)
- selected.to_csv(selected_path, index=False)
- horizons[horizons["name"].isin(set(selected["name"]))].to_csv(horizon_path, index=False)
- periods[periods["name"].isin(set(selected["name"]))].to_csv(period_path, index=False)
- report_path.write_text(
- write_report(
- "rtk .venv/bin/python scripts/validate_eth_relmom_lb84_regime_gate.py",
- [totals_path, selected_path, horizon_path, period_path, report_path],
- selected,
- horizons[horizons["name"].isin(set(selected["name"]))],
- periods[periods["name"].isin(set(selected["name"]))],
- qualified,
- ),
- encoding="utf-8",
- )
- print(report_path)
- print(selected.head(10).to_string(index=False))
- return 0
- if __name__ == "__main__":
- raise SystemExit(main())
|