search_recent_squeeze_breakout_refine.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. from __future__ import annotations
  2. from dataclasses import dataclass
  3. from pathlib import Path
  4. import pandas as pd
  5. DATA_DIR = Path("data/okx-candles")
  6. OUTPUT_DIR = Path("reports/recent-regime")
  7. PREFIX = "recent-squeeze-refine"
  8. INITIAL_EQUITY = 10_000.0
  9. ROUNDTRIP_COST = 0.0021
  10. HORIZONS = (
  11. ("3y", pd.DateOffset(years=3)),
  12. ("1y", pd.DateOffset(years=1)),
  13. ("6m", pd.DateOffset(months=6)),
  14. ("90d", pd.DateOffset(days=90)),
  15. ("30d", pd.DateOffset(days=30)),
  16. ("14d", pd.DateOffset(days=14)),
  17. ("7d", pd.DateOffset(days=7)),
  18. )
  19. @dataclass(frozen=True)
  20. class Candidate:
  21. quantile: float
  22. trigger_bars: int
  23. trigger_pct: float
  24. stop_pct: float
  25. take_pct: float
  26. hold_bars: int
  27. side_mode: str
  28. ratio_filter: str
  29. cooldown_bars: int
  30. @property
  31. def name(self) -> str:
  32. return (
  33. f"sqref-q{self.quantile:g}-tb{self.trigger_bars}-tr{self.trigger_pct:g}"
  34. f"-sl{self.stop_pct:g}-tp{self.take_pct:g}-h{self.hold_bars}"
  35. f"-{self.side_mode}-{self.ratio_filter}-cd{self.cooldown_bars}"
  36. )
  37. def load_frame(symbol: str) -> pd.DataFrame:
  38. frame = pd.read_csv(DATA_DIR / symbol / "15m.csv")
  39. frame["dt"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
  40. return frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("dt")
  41. def aligned_frame() -> pd.DataFrame:
  42. eth = load_frame("ETH-USDT-SWAP").add_prefix("eth_")
  43. btc = load_frame("BTC-USDT-SWAP").add_prefix("btc_")
  44. frame = eth.join(btc, how="inner")
  45. frame["ratio"] = frame["eth_close"] / frame["btc_close"]
  46. frame = frame.dropna()
  47. return frame[frame.index >= frame.index[-1] - pd.DateOffset(years=3)]
  48. def max_drawdown(equity: pd.Series) -> float:
  49. peak = equity.cummax()
  50. return float(((peak - equity) / peak).max()) if len(equity) else 0.0
  51. def annualized(total: float, start: pd.Timestamp, end: pd.Timestamp) -> float:
  52. years = (end - start).total_seconds() / 31_536_000
  53. if years <= 0.0 or total <= -1.0:
  54. return -1.0 if total <= -1.0 else 0.0
  55. return (1.0 + total) ** (1.0 / years) - 1.0
  56. def squeeze_series(frame: pd.DataFrame, quantile: float) -> pd.Series:
  57. close = frame["eth_close"]
  58. middle = close.rolling(96).mean()
  59. width = 4.0 * close.rolling(96).std(ddof=0) / middle
  60. return width <= width.rolling(960).quantile(quantile)
  61. def ratio_allows(frame: pd.DataFrame, index: int, side: str, ratio_filter: str) -> bool:
  62. if ratio_filter == "none":
  63. return True
  64. ratio_return = frame["ratio"].iloc[index] / frame["ratio"].iloc[index - 96] - 1.0
  65. if ratio_filter == "weak-short":
  66. return side == "short" and ratio_return < -0.01
  67. if ratio_filter == "strong-long":
  68. return side == "long" and ratio_return > 0.01
  69. if ratio_filter == "directional":
  70. return bool((side == "short" and ratio_return < -0.005) or (side == "long" and ratio_return > 0.005))
  71. raise ValueError(f"unknown ratio_filter {ratio_filter}")
  72. def signal_side(candidate: Candidate, frame: pd.DataFrame, squeeze: pd.Series, index: int) -> str | None:
  73. if not bool(squeeze.iloc[index]):
  74. return None
  75. move = frame["eth_close"].iloc[index] / frame["eth_close"].iloc[index - candidate.trigger_bars] - 1.0
  76. if candidate.side_mode in ("long", "both") and move >= candidate.trigger_pct and ratio_allows(frame, index, "long", candidate.ratio_filter):
  77. return "long"
  78. if candidate.side_mode in ("short", "both") and move <= -candidate.trigger_pct and ratio_allows(frame, index, "short", candidate.ratio_filter):
  79. return "short"
  80. return None
  81. def exit_return(side: str, entry: float, exit_price: float) -> float:
  82. gross = exit_price / entry - 1.0 if side == "long" else entry / exit_price - 1.0
  83. return gross - ROUNDTRIP_COST
  84. def mark_equity(equity: float, side: str, entry: float, mark: float) -> float:
  85. gross = mark / entry - 1.0 if side == "long" else entry / mark - 1.0
  86. return equity * (1.0 + gross)
  87. def run_candidate(frame: pd.DataFrame, candidate: Candidate) -> tuple[pd.Series, pd.DataFrame]:
  88. squeeze = squeeze_series(frame, candidate.quantile)
  89. equity = INITIAL_EQUITY
  90. position: dict[str, object] | None = None
  91. pending_side: str | None = None
  92. cooldown_until = -1
  93. curve: list[dict[str, object]] = []
  94. trades: list[dict[str, object]] = []
  95. rows = list(frame.itertuples())
  96. warmup = max(960, 96, candidate.trigger_bars) + 1
  97. for index in range(warmup, len(rows)):
  98. row = rows[index]
  99. ts = frame.index[index]
  100. if pending_side is not None and position is None:
  101. entry = float(row.eth_open)
  102. position = {"side": pending_side, "entry": entry, "entry_index": index, "entry_time": ts}
  103. pending_side = None
  104. current = equity
  105. if position is not None:
  106. side = str(position["side"])
  107. entry = float(position["entry"])
  108. if side == "long":
  109. stop_price = entry * (1.0 - candidate.stop_pct)
  110. take_price = entry * (1.0 + candidate.take_pct)
  111. stop_hit = float(row.eth_low) <= stop_price
  112. take_hit = float(row.eth_high) >= take_price
  113. else:
  114. stop_price = entry * (1.0 + candidate.stop_pct)
  115. take_price = entry * (1.0 - candidate.take_pct)
  116. stop_hit = float(row.eth_high) >= stop_price
  117. take_hit = float(row.eth_low) <= take_price
  118. hold_hit = index - int(position["entry_index"]) >= candidate.hold_bars
  119. if stop_hit or take_hit or hold_hit:
  120. exit_price = stop_price if stop_hit else take_price if take_hit else float(row.eth_close)
  121. ret = exit_return(side, entry, exit_price)
  122. equity *= 1.0 + ret
  123. trades.append(
  124. {
  125. "name": candidate.name,
  126. "entry_time": position["entry_time"],
  127. "exit_time": ts,
  128. "side": side,
  129. "entry": entry,
  130. "exit": exit_price,
  131. "return": ret,
  132. "bars": index - int(position["entry_index"]),
  133. }
  134. )
  135. current = equity
  136. position = None
  137. cooldown_until = index + candidate.cooldown_bars
  138. else:
  139. current = mark_equity(equity, side, entry, float(row.eth_close))
  140. curve.append({"dt": ts, "equity": current})
  141. if index == len(rows) - 1 or position is not None or index < cooldown_until:
  142. continue
  143. side = signal_side(candidate, frame, squeeze, index)
  144. if side is not None:
  145. pending_side = side
  146. equity_series = pd.DataFrame(curve).set_index("dt")["equity"]
  147. return equity_series, pd.DataFrame(trades)
  148. def metrics(equity: pd.Series, trades: pd.DataFrame, start: pd.Timestamp | None = None) -> dict[str, object]:
  149. scoped = equity if start is None else equity[equity.index >= start]
  150. if len(scoped) < 2:
  151. scoped = equity
  152. scoped_trades = trades if start is None or trades.empty else trades[pd.to_datetime(trades["entry_time"], utc=True) >= start]
  153. trade_returns = scoped_trades["return"] if len(scoped_trades) else pd.Series(dtype=float)
  154. wins = trade_returns[trade_returns > 0.0]
  155. losses = trade_returns[trade_returns < 0.0]
  156. total = float(scoped.iloc[-1] / scoped.iloc[0] - 1.0)
  157. gross_profit = float(wins.sum()) if len(wins) else 0.0
  158. gross_loss = abs(float(losses.sum())) if len(losses) else 0.0
  159. avg_win = float(wins.mean()) if len(wins) else 0.0
  160. avg_loss = abs(float(losses.mean())) if len(losses) else 0.0
  161. return {
  162. "start": scoped.index[0].strftime("%Y-%m-%d %H:%M"),
  163. "end": scoped.index[-1].strftime("%Y-%m-%d %H:%M"),
  164. "total_return": total,
  165. "annualized_return": annualized(total, scoped.index[0], scoped.index[-1]),
  166. "max_drawdown": max_drawdown(scoped),
  167. "calmar": annualized(total, scoped.index[0], scoped.index[-1]) / max_drawdown(scoped) if max_drawdown(scoped) else 0.0,
  168. "trades": int(len(trade_returns)),
  169. "win_rate": float(len(wins) / len(trade_returns)) if len(trade_returns) else 0.0,
  170. "profit_factor": gross_profit / gross_loss if gross_loss else (999.0 if gross_profit else 0.0),
  171. "payoff_ratio": avg_win / avg_loss if avg_loss else 0.0,
  172. }
  173. def candidates() -> list[Candidate]:
  174. output: list[Candidate] = []
  175. for quantile in (0.15, 0.20):
  176. for trigger_bars in (4,):
  177. for trigger_pct in (0.004,):
  178. for stop_pct, take_pct in ((0.006, 0.012), (0.008, 0.016)):
  179. for hold_bars in (48,):
  180. for side_mode in ("long", "short", "both"):
  181. for ratio_filter in ("none", "directional"):
  182. output.append(
  183. Candidate(
  184. quantile,
  185. trigger_bars,
  186. trigger_pct,
  187. stop_pct,
  188. take_pct,
  189. hold_bars,
  190. side_mode,
  191. ratio_filter,
  192. 8,
  193. )
  194. )
  195. return output
  196. def monthly_rows(name: str, equity: pd.Series) -> pd.DataFrame:
  197. monthly = equity.resample("ME").last()
  198. frame = pd.DataFrame(
  199. {
  200. "name": name,
  201. "month": monthly.index.strftime("%Y-%m"),
  202. "start_equity": monthly.shift(1).fillna(equity.iloc[0]).to_numpy(),
  203. "end_equity": monthly.to_numpy(),
  204. }
  205. )
  206. frame["return"] = frame["end_equity"] / frame["start_equity"] - 1.0
  207. return frame
  208. def format_cell(value: object) -> str:
  209. if isinstance(value, float):
  210. return f"{value:.6g}"
  211. return str(value).replace("|", "\\|")
  212. def markdown_table(frame: pd.DataFrame) -> str:
  213. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  214. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  215. return "\n".join("| " + " | ".join(format_cell(value) for value in row) + " |" for row in rows)
  216. def main() -> int:
  217. OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
  218. frame = aligned_frame()
  219. total_rows: list[dict[str, object]] = []
  220. horizon_rows: list[dict[str, object]] = []
  221. monthly_output: list[pd.DataFrame] = []
  222. equity_by_name: dict[str, pd.Series] = {}
  223. trades_by_name: dict[str, pd.DataFrame] = {}
  224. for candidate in candidates():
  225. equity, trades = run_candidate(frame, candidate)
  226. full = metrics(equity, trades)
  227. horizons = {label: metrics(equity, trades, equity.index[-1] - offset) for label, offset in HORIZONS}
  228. recent_returns = [horizons[label]["total_return"] for label in ("90d", "30d", "14d", "7d")]
  229. row = {
  230. "name": candidate.name,
  231. "side_mode": candidate.side_mode,
  232. "ratio_filter": candidate.ratio_filter,
  233. "quantile": candidate.quantile,
  234. "trigger_bars": candidate.trigger_bars,
  235. "trigger_pct": candidate.trigger_pct,
  236. "stop_pct": candidate.stop_pct,
  237. "take_pct": candidate.take_pct,
  238. "hold_bars": candidate.hold_bars,
  239. **full,
  240. "return_3y": horizons["3y"]["total_return"],
  241. "return_1y": horizons["1y"]["total_return"],
  242. "return_6m": horizons["6m"]["total_return"],
  243. "return_90d": horizons["90d"]["total_return"],
  244. "return_30d": horizons["30d"]["total_return"],
  245. "return_14d": horizons["14d"]["total_return"],
  246. "return_7d": horizons["7d"]["total_return"],
  247. "min_recent_return": min(float(value) for value in recent_returns),
  248. "recent_trades": sum(int(horizons[label]["trades"]) for label in ("30d", "14d", "7d")),
  249. }
  250. row["score"] = (
  251. 3.0 * float(row["return_30d"])
  252. + 2.0 * float(row["return_14d"])
  253. + float(row["return_7d"])
  254. + float(row["return_90d"])
  255. - 0.5 * float(row["max_drawdown"])
  256. )
  257. total_rows.append(row)
  258. for label, values in horizons.items():
  259. horizon_rows.append({"name": candidate.name, "horizon": label, **values})
  260. equity_by_name[candidate.name] = equity
  261. trades_by_name[candidate.name] = trades
  262. total = pd.DataFrame(total_rows).sort_values(
  263. ["min_recent_return", "return_30d", "return_14d", "score"],
  264. ascending=[False, False, False, False],
  265. )
  266. qualified = total[
  267. (total["return_30d"] > 0.0)
  268. & (total["return_14d"] > 0.0)
  269. & (total["return_7d"] > 0.0)
  270. & (total["trades"] >= 30)
  271. & (total["profit_factor"] >= 1.0)
  272. ].copy()
  273. top = qualified.head(12) if len(qualified) else total.head(12)
  274. for name in top["name"]:
  275. monthly_output.append(monthly_rows(str(name), equity_by_name[str(name)]))
  276. horizons = pd.DataFrame(horizon_rows)
  277. monthly = pd.concat(monthly_output, ignore_index=True) if monthly_output else pd.DataFrame(columns=["name", "month", "start_equity", "end_equity", "return"])
  278. total_path = OUTPUT_DIR / f"{PREFIX}-total.csv"
  279. qualified_path = OUTPUT_DIR / f"{PREFIX}-qualified.csv"
  280. horizon_path = OUTPUT_DIR / f"{PREFIX}-horizons.csv"
  281. monthly_path = OUTPUT_DIR / f"{PREFIX}-monthly.csv"
  282. report_path = OUTPUT_DIR / f"{PREFIX}-report.md"
  283. total.to_csv(total_path, index=False)
  284. qualified.to_csv(qualified_path, index=False)
  285. horizons.to_csv(horizon_path, index=False)
  286. monthly.to_csv(monthly_path, index=False)
  287. report_path.write_text(
  288. "\n".join(
  289. [
  290. "# Recent Squeeze Breakout Refine",
  291. "",
  292. "Scope: ETH 15m only, data through the local cache end, roundtrip cost 0.21% on margin.",
  293. "",
  294. "## Top Qualified",
  295. "",
  296. markdown_table(
  297. top[
  298. [
  299. "name",
  300. "total_return",
  301. "max_drawdown",
  302. "profit_factor",
  303. "trades",
  304. "return_90d",
  305. "return_30d",
  306. "return_14d",
  307. "return_7d",
  308. "min_recent_return",
  309. ]
  310. ]
  311. ),
  312. "",
  313. "## Interpretation",
  314. "",
  315. "Qualified rows require positive 30d/14d/7d returns, at least 30 full-period trades, and profit factor >= 1.0.",
  316. "If this table still has negative 90d rows, the pattern is recent-regime specific rather than robust across the full recent quarter.",
  317. "",
  318. ]
  319. ),
  320. encoding="utf-8",
  321. )
  322. print(report_path)
  323. print(top.head(8).to_string(index=False))
  324. return 0
  325. if __name__ == "__main__":
  326. raise SystemExit(main())