validate_trend_exhaustion_candidate.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. from __future__ import annotations
  2. import argparse
  3. import sys
  4. from dataclasses import replace
  5. from pathlib import Path
  6. import pandas as pd
  7. sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
  8. from scripts.search_eth_bearish_failure_confirmation import (
  9. INITIAL_EQUITY,
  10. Spec,
  11. close_return,
  12. joined_frames,
  13. load_frame,
  14. markdown_table,
  15. period_metrics,
  16. resample,
  17. signals,
  18. )
  19. from scripts.search_long_short_fusion import component_returns, metrics
  20. OUTPUT_DIR = Path("reports/eth-exploration")
  21. PREFIX = "trend-exhaustion-narrow-validation"
  22. BASELINE_EQUITY = Path("reports/eth-exploration/eth-focused-portfolio-conservative-equity.csv")
  23. BASELINE_PORTFOLIO = "all_legs-risk-3-c0124-eth_btc_rsi_filter+btc_lead_eth_lag_15m+eth_robust_twap"
  24. CANDIDATE = Spec("trend_exhaustion", "1H", 50, 240, 8, 0.012, 0.03, 0.045, 72, "none")
  25. HORIZONS = (
  26. ("full", None),
  27. ("3y", pd.DateOffset(years=3)),
  28. ("1y", pd.DateOffset(years=1)),
  29. ("6m", pd.DateOffset(months=6)),
  30. ("3m", pd.DateOffset(months=3)),
  31. )
  32. OVERLAY_WEIGHTS = (0.025, 0.05, 0.075, 0.10)
  33. def run_spec(spec: Spec, frame: pd.DataFrame, entry_filter: pd.Series | None = None) -> tuple[pd.Series, list[dict[str, object]]]:
  34. entry, exit_ = signals(spec, frame)
  35. if entry_filter is not None:
  36. entry = entry & entry_filter.reindex(entry.index).fillna(False)
  37. warmup = max(spec.slow, 260, spec.lookback * 3) + 2
  38. equity = INITIAL_EQUITY
  39. position: dict[str, object] | None = None
  40. pending_entry = False
  41. pending_exit = False
  42. trades: list[dict[str, object]] = []
  43. curve: list[tuple[pd.Timestamp, float]] = []
  44. rows = list(frame.itertuples())
  45. for index in range(warmup, len(rows)):
  46. candle = rows[index]
  47. ts = frame.index[index]
  48. if pending_exit and position is not None:
  49. net = close_return(float(position["entry_price"]), float(candle.open))
  50. equity *= 1 + net
  51. trades.append({"entry_time": position["entry_time"], "exit_time": ts, "return": net})
  52. position = None
  53. pending_exit = False
  54. if pending_entry and position is None and equity > 0:
  55. position = {
  56. "entry_time": ts,
  57. "entry_index": index,
  58. "entry_price": float(candle.open),
  59. "stop": float(candle.open) * (1 + spec.stop),
  60. "take": float(candle.open) * (1 - spec.take),
  61. }
  62. pending_entry = False
  63. mark = equity
  64. if position is not None:
  65. stop_hit = candle.high >= float(position["stop"])
  66. take_hit = candle.low <= float(position["take"])
  67. if stop_hit or take_hit:
  68. price = float(position["stop"] if stop_hit else position["take"])
  69. net = close_return(float(position["entry_price"]), price)
  70. equity *= 1 + net
  71. trades.append({"entry_time": position["entry_time"], "exit_time": ts, "return": net})
  72. position = None
  73. mark = equity
  74. else:
  75. gross = float(position["entry_price"]) / candle.close - 1
  76. mark = equity * (1 + gross - 0.0004)
  77. curve.append((ts, mark))
  78. if index == len(rows) - 1 or equity <= 0:
  79. continue
  80. if position is None and bool(entry.iloc[index]):
  81. pending_entry = True
  82. elif position is not None and (bool(exit_.iloc[index]) or index - int(position["entry_index"]) >= spec.hold):
  83. pending_exit = True
  84. series = pd.Series({ts: value for ts, value in curve}).sort_index()
  85. daily = series.resample("1D").last().ffill()
  86. daily = pd.concat([pd.Series([INITIAL_EQUITY], index=[daily.index[0].normalize()]), daily]).sort_index()
  87. return daily.groupby(level=0).last(), trades
  88. def row_for_spec(name: str, equity: pd.Series, trades: list[dict[str, object]]) -> dict[str, object]:
  89. row: dict[str, object] = {"name": name}
  90. for label, offset in HORIZONS:
  91. for key, value in period_metrics(equity, trades, offset).items():
  92. row[f"{label}_{key}"] = value
  93. years = yearly_returns(equity)
  94. row["return_2022"] = float(years.get("2022", 0.0))
  95. row["return_2023"] = float(years.get("2023", 0.0))
  96. row["worst_year"] = float(years.min()) if len(years) else 0.0
  97. row["left_tail_2022_2023"] = min(row["return_2022"], row["return_2023"])
  98. return row
  99. def yearly_returns(equity: pd.Series) -> pd.Series:
  100. sampled = equity.resample("YE").last().dropna()
  101. starts = equity.resample("YE").first().reindex(sampled.index)
  102. returns = sampled / starts - 1.0
  103. returns.index = sampled.index.tz_localize(None).to_period("Y").astype(str)
  104. return returns
  105. def neighborhood_specs() -> list[Spec]:
  106. specs = {CANDIDATE.name: CANDIDATE}
  107. for fast, slow in ((40, 220), (40, 240), (50, 220), (50, 260), (60, 240), (60, 260)):
  108. spec = replace(CANDIDATE, fast=fast, slow=slow)
  109. specs[spec.name] = spec
  110. for field, values in (
  111. ("lookback", (6, 10)),
  112. ("threshold", (0.010, 0.014)),
  113. ("stop", (0.025, 0.035)),
  114. ("take", (0.040, 0.050)),
  115. ("hold", (60, 84)),
  116. ):
  117. for value in values:
  118. spec = replace(CANDIDATE, **{field: value})
  119. specs[spec.name] = spec
  120. return list(specs.values())
  121. def feature_filters(frame: pd.DataFrame) -> dict[str, pd.Series]:
  122. close = frame["close"]
  123. btc = frame["btc_close"]
  124. slow = close.ewm(span=CANDIDATE.slow, adjust=False).mean()
  125. eth_slope = slow / slow.shift(24) - 1.0
  126. btc_sma = btc.rolling(240).mean()
  127. btc_slope = btc_sma / btc_sma.shift(24) - 1.0
  128. eth_vol = close.pct_change().rolling(72).std()
  129. vol_rank = eth_vol.rolling(720).rank(pct=True)
  130. return {
  131. "btc_above_sma240": btc > btc_sma,
  132. "btc_sma240_slope_ge_0": btc_slope >= 0.0,
  133. "eth_slope_ge_-0.015": eth_slope >= -0.015,
  134. "eth_slope_-0.015_to_0": (eth_slope >= -0.015) & (eth_slope <= 0.0),
  135. "vol_rank_0.35_to_0.85": (vol_rank >= 0.35) & (vol_rank <= 0.85),
  136. "vol_rank_le_0.85": vol_rank <= 0.85,
  137. "btc_up_and_mid_vol": (btc > btc_sma) & (vol_rank >= 0.35) & (vol_rank <= 0.85),
  138. "btc_up_eth_slope_ge_-0.015": (btc > btc_sma) & (eth_slope >= -0.015),
  139. "btc_up_mid_vol_eth_slope": (btc > btc_sma) & (vol_rank >= 0.35) & (vol_rank <= 0.85) & (eth_slope >= -0.015),
  140. "btc_slope_up_mid_vol_eth_slope": (btc_slope >= 0.0) & (vol_rank >= 0.35) & (vol_rank <= 0.85) & (eth_slope >= -0.015),
  141. }
  142. def baseline_equity(path: Path, portfolio: str) -> pd.Series:
  143. frame = pd.read_csv(path)
  144. selected = frame[
  145. (frame["portfolio"] == portfolio)
  146. & (frame["cost_model"] == "maker_taker")
  147. & (frame["scope"] == "all_legs")
  148. ].copy()
  149. selected["date"] = pd.to_datetime(selected["date"], utc=True)
  150. series = selected.sort_values("date").set_index("date")["equity"].astype(float)
  151. series.name = portfolio
  152. return series
  153. def overlay_rows(base: pd.Series, overlay: pd.Series) -> pd.DataFrame:
  154. base_metrics = {label: horizon_metrics(base, offset) for label, offset in HORIZONS}
  155. rows = []
  156. overlay_returns = component_returns(overlay)
  157. for weight in OVERLAY_WEIGHTS:
  158. aligned = pd.DataFrame({"base": component_returns(base), "overlay": overlay_returns}).dropna()
  159. combined = aligned["base"] + aligned["overlay"] * weight
  160. equity = INITIAL_EQUITY * (1.0 + combined).cumprod()
  161. for label, offset in HORIZONS:
  162. row = horizon_metrics(equity, offset)
  163. baseline = base_metrics[label]
  164. rows.append(
  165. {
  166. "overlay_weight": weight,
  167. "horizon": label,
  168. **row,
  169. "baseline_total_return": baseline["total_return"],
  170. "baseline_max_drawdown": baseline["max_drawdown"],
  171. "baseline_calmar": baseline["calmar"],
  172. "delta_total_return": row["total_return"] - baseline["total_return"],
  173. "delta_max_drawdown": row["max_drawdown"] - baseline["max_drawdown"],
  174. "delta_calmar": row["calmar"] - baseline["calmar"],
  175. }
  176. )
  177. return pd.DataFrame(rows)
  178. def horizon_metrics(series: pd.Series, offset: pd.DateOffset | None) -> dict[str, object]:
  179. scoped = series if offset is None else series[series.index >= series.index[-1] - offset]
  180. if len(scoped) < 2:
  181. scoped = series
  182. return {"start": scoped.index[0].strftime("%Y-%m-%d"), "end": scoped.index[-1].strftime("%Y-%m-%d"), **metrics(scoped)}
  183. def report_text(paths: list[Path], neighborhood: pd.DataFrame, filters: pd.DataFrame, overlay: pd.DataFrame, selected_name: str) -> str:
  184. base = filters[filters["name"] == "unfiltered"].iloc[0]
  185. best_filter = filters.iloc[0]
  186. full_overlay = overlay[overlay["horizon"] == "full"].sort_values(["delta_calmar", "delta_max_drawdown"], ascending=[False, True])
  187. best_overlay = full_overlay.iloc[0]
  188. neighborhood_pass = int(
  189. (
  190. (neighborhood["full_total_return"] > 0.0)
  191. & (neighborhood["3y_total_return"] > 0.0)
  192. & (neighborhood["1y_total_return"] > 0.0)
  193. & (neighborhood["return_2022"] > -0.10)
  194. & (neighborhood["return_2023"] > -0.05)
  195. ).sum()
  196. )
  197. include = bool(
  198. neighborhood_pass >= 5
  199. and best_filter["return_2022"] > -0.10
  200. and best_filter["return_2023"] > -0.05
  201. and best_filter["full_total_return"] > 0.0
  202. and best_overlay["delta_calmar"] > 0.0
  203. and best_overlay["delta_max_drawdown"] <= 0.0
  204. )
  205. verdict = (
  206. f"Include `{CANDIDATE.name}` with `{selected_name}` as a capped 0.025-0.10 overlay dimension in the conservative ETH portfolio search."
  207. if include
  208. else f"Reject `{CANDIDATE.name}` for portfolio search. The narrow validation does not satisfy stability, left-tail, and overlay drawdown/Calmar requirements together."
  209. )
  210. keep = [
  211. "name",
  212. "full_total_return",
  213. "full_annualized_return",
  214. "full_max_drawdown",
  215. "full_profit_factor",
  216. "full_trades",
  217. "3y_total_return",
  218. "1y_total_return",
  219. "6m_total_return",
  220. "3m_total_return",
  221. "return_2022",
  222. "return_2023",
  223. "worst_year",
  224. ]
  225. overlay_keep = [
  226. "overlay_weight",
  227. "horizon",
  228. "total_return",
  229. "max_drawdown",
  230. "calmar",
  231. "delta_total_return",
  232. "delta_max_drawdown",
  233. "delta_calmar",
  234. ]
  235. return "\n".join(
  236. [
  237. "# Trend Exhaustion Narrow Validation",
  238. "",
  239. "Run command: `rtk .venv/bin/python scripts/validate_trend_exhaustion_candidate.py`",
  240. "",
  241. "Output files:",
  242. *[f"- `{path}`" for path in paths],
  243. "",
  244. f"Candidate: `{CANDIDATE.name}`.",
  245. "Scope: local OKX `ETH-USDT-SWAP` and `BTC-USDT-SWAP` candles only. No live path touched.",
  246. "All filters use current or historical completed 1H candles; entries execute on the next open.",
  247. "",
  248. "## Small Parameter Neighborhood",
  249. "",
  250. f"Neighborhood pass count under fixed left-tail thresholds: {neighborhood_pass}/{len(neighborhood)}.",
  251. "",
  252. markdown_table(neighborhood.sort_values(["full_total_return", "left_tail_2022_2023"], ascending=[False, False]).head(12)[keep]),
  253. "",
  254. "## Structural Filters",
  255. "",
  256. "Filter objective: improve 2022/2023 left tail without using future candles.",
  257. "",
  258. markdown_table(filters.head(12)[keep]),
  259. "",
  260. "Unfiltered 2022/2023:",
  261. f"- 2022 `{base['return_2022']:.4f}`, 2023 `{base['return_2023']:.4f}`",
  262. "",
  263. "## Conservative Portfolio Overlay",
  264. "",
  265. f"Overlay source: `{CANDIDATE.name}` filtered by `{selected_name}`.",
  266. "",
  267. markdown_table(overlay[overlay["horizon"].isin(["full", "3y", "1y"])][overlay_keep]),
  268. "",
  269. "## Verdict",
  270. "",
  271. verdict,
  272. "",
  273. ]
  274. )
  275. def main() -> int:
  276. parser = argparse.ArgumentParser()
  277. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  278. parser.add_argument("--baseline-equity", type=Path, default=BASELINE_EQUITY)
  279. parser.add_argument("--baseline-portfolio", default=BASELINE_PORTFOLIO)
  280. args = parser.parse_args()
  281. eth = load_frame("ETH-USDT-SWAP")
  282. btc = load_frame("BTC-USDT-SWAP")
  283. frame = joined_frames(resample(eth, "1H"), resample(btc, "1H"))
  284. neighborhood_rows = []
  285. best_equity: pd.Series | None = None
  286. selected_name = ""
  287. for spec in neighborhood_specs():
  288. equity, trades = run_spec(spec, frame)
  289. row = row_for_spec(spec.name, equity, trades)
  290. neighborhood_rows.append(row)
  291. if spec == CANDIDATE:
  292. best_equity = equity
  293. selected_name = spec.name
  294. neighborhood = pd.DataFrame(neighborhood_rows)
  295. filter_rows = []
  296. filter_equities: dict[str, pd.Series] = {}
  297. equity, trades = run_spec(CANDIDATE, frame)
  298. filter_rows.append(row_for_spec("unfiltered", equity, trades))
  299. filter_equities["unfiltered"] = equity
  300. for name, mask in feature_filters(frame).items():
  301. equity, trades = run_spec(CANDIDATE, frame, mask)
  302. filter_rows.append(row_for_spec(name, equity, trades))
  303. filter_equities[name] = equity
  304. filters = pd.DataFrame(filter_rows).sort_values(
  305. ["left_tail_2022_2023", "full_max_drawdown", "full_total_return"],
  306. ascending=[False, True, False],
  307. )
  308. selected_filter_name = str(filters.iloc[0]["name"])
  309. selected_equity = filter_equities[selected_filter_name]
  310. if best_equity is None:
  311. best_equity = filter_equities["unfiltered"]
  312. selected_name = CANDIDATE.name
  313. overlay = overlay_rows(baseline_equity(args.baseline_equity, args.baseline_portfolio), selected_equity)
  314. args.output_dir.mkdir(parents=True, exist_ok=True)
  315. neighborhood_path = args.output_dir / f"{PREFIX}-neighborhood.csv"
  316. filters_path = args.output_dir / f"{PREFIX}-filters.csv"
  317. overlay_path = args.output_dir / f"{PREFIX}-overlay.csv"
  318. report_path = args.output_dir / f"{PREFIX}.md"
  319. neighborhood.to_csv(neighborhood_path, index=False)
  320. filters.to_csv(filters_path, index=False)
  321. overlay.to_csv(overlay_path, index=False)
  322. report_path.write_text(
  323. report_text(
  324. [neighborhood_path, filters_path, overlay_path, report_path],
  325. neighborhood,
  326. filters,
  327. overlay,
  328. selected_filter_name,
  329. ),
  330. encoding="utf-8",
  331. )
  332. print(report_path)
  333. print(filters.head(8).to_string(index=False))
  334. print(overlay[overlay["horizon"] == "full"].to_string(index=False))
  335. return 0
  336. if __name__ == "__main__":
  337. raise SystemExit(main())