explore_eth_bidir_fusion_candidates.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. from __future__ import annotations
  2. import argparse
  3. import sys
  4. from pathlib import Path
  5. import pandas as pd
  6. sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
  7. from scripts.search_live_bb_squeeze_exit_variants import (
  8. Variant as LiveVariant,
  9. _load_candles as load_live_candles,
  10. cost_equity_frame as live_cost_equity_frame,
  11. run_variant as run_live_variant,
  12. )
  13. OUTPUT_DIR = Path("reports/eth-exploration")
  14. OUT_PREFIX = "eth-bidir-fusion-candidates"
  15. LIVE_NAME = "live_bb_squeeze_mxbuf0.0005"
  16. LIVE_VARIANT = LiveVariant(0.0005, 1)
  17. HORIZONS = (
  18. ("full", None),
  19. ("3y", pd.DateOffset(years=3)),
  20. ("1y", pd.DateOffset(years=1)),
  21. ("6m", pd.DateOffset(months=6)),
  22. ("3m", pd.DateOffset(months=3)),
  23. )
  24. def metrics(series: pd.Series) -> dict[str, float]:
  25. years = (series.index[-1] - series.index[0]).total_seconds() / 31_536_000
  26. total = float(series.iloc[-1] / series.iloc[0] - 1.0)
  27. annualized = (1.0 + total) ** (1.0 / years) - 1.0 if total > -1.0 and years > 0.0 else 0.0
  28. drawdown = float(((series.cummax() - series) / series.cummax()).max())
  29. return {
  30. "total_return": total,
  31. "annualized_return": annualized,
  32. "max_drawdown": drawdown,
  33. "calmar": annualized / drawdown if drawdown else 0.0,
  34. }
  35. def horizon_metrics(series: pd.Series) -> dict[str, float]:
  36. out: dict[str, float] = {}
  37. end = series.index[-1]
  38. for label, offset in HORIZONS:
  39. part = series if offset is None else series[series.index >= end - offset]
  40. if len(part) < 2:
  41. part = series
  42. values = metrics(part)
  43. for key, value in values.items():
  44. out[f"{label}_{key}"] = value
  45. return out
  46. def recent_return(series: pd.Series, days: int) -> float:
  47. cutoff = series.index[-1] - pd.Timedelta(days=days)
  48. part = series[series.index >= cutoff]
  49. if len(part) < 2:
  50. return 0.0
  51. return float(part.iloc[-1] / part.iloc[0] - 1.0)
  52. def trade_count(trades: list[dict[str, object]], days: int, end: pd.Timestamp) -> int:
  53. cutoff = end - pd.Timedelta(days=days)
  54. return sum(pd.Timestamp(trade["entry_time"], tz="UTC") >= cutoff for trade in trades)
  55. def live_row() -> dict[str, object]:
  56. candles = load_live_candles("ETH-USDT-SWAP", "15m")
  57. result = run_live_variant(candles, LIVE_VARIANT)
  58. frame = live_cost_equity_frame(result, 0.0021).set_index("ts")["equity"].sort_index()
  59. end = pd.to_datetime(candles[-1].ts, unit="ms", utc=True)
  60. row: dict[str, object] = {
  61. "source": "current_live",
  62. "name": LIVE_NAME,
  63. "kind": "live_bb_squeeze",
  64. "direction": "bidir",
  65. "full_trades": len(result.trades),
  66. "trades_30d": trade_count(result.trades, 30, end),
  67. "trades_14d": trade_count(result.trades, 14, end),
  68. "return_30d": recent_return(frame, 30),
  69. "return_14d": recent_return(frame, 14),
  70. "last_time": end.strftime("%Y-%m-%d %H:%M"),
  71. }
  72. row.update(horizon_metrics(frame))
  73. return row
  74. def fusion_rows() -> list[dict[str, object]]:
  75. path = Path("reports/long-short-fusion/fusion-total.csv")
  76. if not path.exists():
  77. return []
  78. frame = pd.read_csv(path)
  79. frame = frame.sort_values(
  80. ["h1y_return", "h6m_return", "h3m_return", "max_drawdown"],
  81. ascending=[False, False, False, True],
  82. ).head(200)
  83. rows: list[dict[str, object]] = []
  84. for _, source in frame.iterrows():
  85. row = {
  86. "source": "long_short_fusion",
  87. "name": source["name"],
  88. "kind": "fusion",
  89. "direction": "long+short",
  90. "full_total_return": float(source["total_return"]),
  91. "full_annualized_return": float(source["annualized_return"]),
  92. "full_max_drawdown": float(source["max_drawdown"]),
  93. "full_calmar": float(source["calmar"]),
  94. "3y_total_return": float(source["h3y_return"]),
  95. "1y_total_return": float(source["h1y_return"]),
  96. "6m_total_return": float(source["h6m_return"]),
  97. "3m_total_return": float(source["h3m_return"]),
  98. "full_trades": int(source["trades"]),
  99. "trades_30d": 0,
  100. "trades_14d": 0,
  101. "return_30d": 0.0,
  102. "return_14d": 0.0,
  103. "last_time": "",
  104. "long_weight": float(source["long_weight"]),
  105. "short_exposure": float(source["short_exposure"]),
  106. "recent_trigger_source": "not_available_in_existing_fusion_outputs",
  107. }
  108. rows.append(row)
  109. return rows
  110. def report_candidate_rows(path: Path, source: str) -> list[dict[str, object]]:
  111. if not path.exists():
  112. return []
  113. frame = pd.read_csv(path)
  114. rows: list[dict[str, object]] = []
  115. for _, source_row in frame.iterrows():
  116. row = {
  117. "source": source,
  118. "name": source_row["name"],
  119. "kind": source_row["family"],
  120. "direction": "bidir" if "bidir" in str(source_row["family"]) else "short",
  121. "last_time": source_row["last_time"],
  122. "return_30d": float(source_row.get("1m_total_return", 0.0)),
  123. "return_14d": float(source_row.get("2w_total_return", 0.0)),
  124. "trades_30d": int(source_row.get("1m_trades", 0)),
  125. "trades_14d": int(source_row.get("2w_trades", 0)),
  126. }
  127. for label in ("full", "3y", "1y", "3m"):
  128. for key in ("total_return", "annualized_return", "max_drawdown", "calmar"):
  129. row[f"{label}_{key}"] = float(source_row.get(f"{label}_{key}", 0.0))
  130. row["6m_total_return"] = 0.0
  131. row["6m_annualized_return"] = 0.0
  132. row["6m_max_drawdown"] = 0.0
  133. row["6m_calmar"] = 0.0
  134. row["full_trades"] = int(source_row.get("full_trades", 0))
  135. row["recent_trigger_source"] = "1m_2w_report_fields"
  136. rows.append(row)
  137. return rows
  138. def normalize_rows(rows: list[dict[str, object]]) -> pd.DataFrame:
  139. frame = pd.DataFrame(rows)
  140. for column in (
  141. "full_total_return",
  142. "3y_total_return",
  143. "1y_total_return",
  144. "6m_total_return",
  145. "3m_total_return",
  146. "return_30d",
  147. "return_14d",
  148. "full_max_drawdown",
  149. ):
  150. if column not in frame:
  151. frame[column] = 0.0
  152. frame["all_horizons_nonnegative"] = (
  153. (frame["full_total_return"] >= 0.0)
  154. & (frame["3y_total_return"] >= 0.0)
  155. & (frame["1y_total_return"] >= 0.0)
  156. & (frame["6m_total_return"] >= 0.0)
  157. & (frame["3m_total_return"] >= 0.0)
  158. )
  159. frame["recent_active"] = (frame["trades_30d"] >= 4) & (frame["trades_14d"] >= 2)
  160. frame["passes_rule"] = frame["all_horizons_nonnegative"] & frame["recent_active"]
  161. frame["candidate"] = (frame["source"] != "current_live") & frame["passes_rule"]
  162. frame["score"] = (
  163. frame["1y_total_return"]
  164. + 0.5 * frame["6m_total_return"]
  165. + 0.25 * frame["3m_total_return"]
  166. + frame["trades_30d"].clip(upper=20) * 0.005
  167. - frame["full_max_drawdown"]
  168. )
  169. return frame.sort_values(["candidate", "score"], ascending=[False, False])
  170. def markdown_table(frame: pd.DataFrame) -> str:
  171. def cell(value: object) -> str:
  172. if isinstance(value, float):
  173. return f"{value:.4f}"
  174. return str(value).replace("|", "\\|")
  175. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  176. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  177. return "\n".join("| " + " | ".join(cell(value) for value in row) + " |" for row in rows)
  178. def report_text(command: str, paths: list[Path], frame: pd.DataFrame) -> str:
  179. cols = [
  180. "source",
  181. "name",
  182. "kind",
  183. "direction",
  184. "full_total_return",
  185. "3y_total_return",
  186. "1y_total_return",
  187. "6m_total_return",
  188. "3m_total_return",
  189. "return_30d",
  190. "return_14d",
  191. "trades_30d",
  192. "trades_14d",
  193. "full_max_drawdown",
  194. "passes_rule",
  195. ]
  196. live = frame[frame["source"] == "current_live"][cols]
  197. selected = frame[frame["candidate"]][cols].head(20)
  198. fusion_near = frame[(frame["source"] == "long_short_fusion") & frame["all_horizons_nonnegative"]][cols].head(12)
  199. recent_fail = frame[
  200. (frame["source"] != "current_live")
  201. & frame["recent_active"]
  202. & ~frame["all_horizons_nonnegative"]
  203. ][cols].sort_values(["trades_30d", "trades_14d"], ascending=[False, False]).head(12)
  204. return "\n".join(
  205. [
  206. "# ETH Bidirectional Fusion Candidate Exploration",
  207. "",
  208. f"Run command: `{command}`",
  209. "",
  210. "Scope: offline research only. The script reads local candle cache plus existing CSV reports and does not touch live executor, deployment, credentials, or order submission.",
  211. "",
  212. "Selection rule: full/3y/1y/6m/3m total returns all nonnegative, plus at least 4 trades in 30d and 2 trades in 14d.",
  213. "",
  214. "Output files:",
  215. *[f"- `{path}`" for path in paths],
  216. "",
  217. "## Current Live BB Squeeze",
  218. "",
  219. markdown_table(live),
  220. "",
  221. "## Replacement Candidates Passing Rule",
  222. "",
  223. markdown_table(selected) if len(selected) else "No candidate passed the full rule.",
  224. "",
  225. "## Long-Horizon Fusion Near Misses",
  226. "",
  227. markdown_table(fusion_near),
  228. "",
  229. "## Recent-Active Horizon Failures",
  230. "",
  231. markdown_table(recent_fail),
  232. "",
  233. "## Conclusion",
  234. "",
  235. "The long/short fusion family is structurally stronger than recent high-frequency short/bidirectional searches on the long horizons, but the existing fusion outputs do not include 30d/14d trigger evidence. The high-frequency candidates have enough recent triggers but fail the long-horizon quality requirement. Current live BB squeeze remains the only row that passes both horizon and recent-trigger filters in this offline comparison.",
  236. "",
  237. "Recommendation: keep any fusion candidate in read-only observation. Do not replace the live BB squeeze from this run alone.",
  238. "",
  239. ]
  240. )
  241. def main() -> int:
  242. parser = argparse.ArgumentParser()
  243. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  244. args = parser.parse_args()
  245. rows = [live_row()]
  246. rows.extend(fusion_rows())
  247. rows.extend(report_candidate_rows(Path("reports/eth-exploration/eth-filtered-recent-short-bidir-candidates.csv"), "filtered_recent_short_bidir"))
  248. rows.extend(report_candidate_rows(Path("reports/ultrashort/eth-highfreq-short-bidir-candidates.csv"), "highfreq_short_bidir"))
  249. frame = normalize_rows(rows)
  250. args.output_dir.mkdir(parents=True, exist_ok=True)
  251. all_path = args.output_dir / f"{OUT_PREFIX}.csv"
  252. selected_path = args.output_dir / f"{OUT_PREFIX}-selected.csv"
  253. report_path = args.output_dir / f"{OUT_PREFIX}-report.md"
  254. frame.to_csv(all_path, index=False)
  255. frame[frame["candidate"]].to_csv(selected_path, index=False)
  256. paths = [all_path, selected_path, report_path]
  257. command = "rtk .venv/bin/python scripts/explore_eth_bidir_fusion_candidates.py"
  258. report_path.write_text(report_text(command, paths, frame), encoding="utf-8")
  259. print(report_path)
  260. print(frame.head(20).to_string(index=False))
  261. return 0
  262. if __name__ == "__main__":
  263. raise SystemExit(main())