search_eth_false_breakout_reversal.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. from __future__ import annotations
  2. import argparse
  3. from dataclasses import dataclass
  4. from pathlib import Path
  5. import pandas as pd
  6. DATA_DIR = Path("data/okx-candles")
  7. OUTPUT_DIR = Path("reports/eth-exploration")
  8. SYMBOLS = ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
  9. INITIAL_EQUITY = 10_000.0
  10. FEE = 0.0004
  11. ROUNDTRIP_FEE = FEE * 2
  12. HORIZONS = (
  13. ("full", None),
  14. ("3y", pd.DateOffset(years=3)),
  15. ("1y", pd.DateOffset(years=1)),
  16. ("6m", pd.DateOffset(months=6)),
  17. ("3m", pd.DateOffset(months=3)),
  18. )
  19. @dataclass(frozen=True)
  20. class Spec:
  21. symbol: str
  22. bar: str
  23. side_mode: str
  24. range_lookback: int
  25. compression_window: int
  26. compression_quantile: float
  27. sweep_pct: float
  28. htf_slow: int
  29. htf_slope_lookback: int
  30. stop_pct: float
  31. take_pct: float
  32. hold: int
  33. @property
  34. def name(self) -> str:
  35. base = self.symbol.split("-")[0].lower()
  36. return (
  37. f"false_breakout_reversal-{base}-{self.bar}-{self.side_mode}"
  38. f"-rl{self.range_lookback}-cw{self.compression_window}-cq{self.compression_quantile:g}"
  39. f"-sw{self.sweep_pct:g}-hs{self.htf_slow}-hl{self.htf_slope_lookback}"
  40. f"-sl{self.stop_pct:g}-tp{self.take_pct:g}-h{self.hold}"
  41. )
  42. def load_frame(symbol: str) -> pd.DataFrame:
  43. frame = pd.read_csv(DATA_DIR / symbol / "15m.csv")
  44. frame["ts"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
  45. return frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("ts")
  46. def resample(frame: pd.DataFrame, bar: str) -> pd.DataFrame:
  47. rule = {"1H": "1h", "2H": "2h", "4H": "4h"}[bar]
  48. return (
  49. frame.resample(rule, label="left", closed="left")
  50. .agg(open=("open", "first"), high=("high", "max"), low=("low", "min"), close=("close", "last"), volume=("volume", "sum"))
  51. .dropna()
  52. )
  53. def signal_frame(spec: Spec, frame: pd.DataFrame) -> pd.DataFrame:
  54. close = frame["close"]
  55. open_ = frame["open"]
  56. prior_high = frame["high"].shift(1).rolling(spec.range_lookback).max()
  57. prior_low = frame["low"].shift(1).rolling(spec.range_lookback).min()
  58. mid = (prior_high + prior_low) / 2
  59. width = (prior_high - prior_low) / close
  60. width_cap = width.rolling(spec.compression_window).quantile(spec.compression_quantile)
  61. compressed = width <= width_cap
  62. htf_rule = {"1H": "4h", "2H": "4h", "4H": "1D"}[spec.bar]
  63. htf_close = close.resample(htf_rule, label="left", closed="left").last().dropna()
  64. htf_ma = htf_close.ewm(span=spec.htf_slow, adjust=False).mean()
  65. htf_slope = htf_ma / htf_ma.shift(spec.htf_slope_lookback) - 1
  66. htf = pd.DataFrame({"htf_ma": htf_ma, "htf_slope": htf_slope}).reindex(frame.index, method="ffill")
  67. short_entry = (
  68. compressed
  69. & (frame["high"] > prior_high * (1 + spec.sweep_pct))
  70. & (close < prior_high)
  71. & (close < open_)
  72. & (close <= htf["htf_ma"])
  73. & (htf["htf_slope"] <= 0)
  74. )
  75. long_entry = (
  76. compressed
  77. & (frame["low"] < prior_low * (1 - spec.sweep_pct))
  78. & (close > prior_low)
  79. & (close > open_)
  80. & (close >= htf["htf_ma"])
  81. & (htf["htf_slope"] >= 0)
  82. )
  83. if spec.side_mode == "short":
  84. long_entry = pd.Series(False, index=frame.index)
  85. return pd.DataFrame(
  86. {
  87. "short_entry": short_entry.fillna(False),
  88. "long_entry": long_entry.fillna(False),
  89. "short_exit": (close <= mid).fillna(False),
  90. "long_exit": (close >= mid).fillna(False),
  91. },
  92. index=frame.index,
  93. )
  94. def close_return(side: str, entry: float, exit_: float) -> float:
  95. gross = exit_ / entry - 1 if side == "long" else entry / exit_ - 1
  96. return gross - ROUNDTRIP_FEE
  97. def run_spec(spec: Spec, frame: pd.DataFrame) -> tuple[pd.Series, list[dict[str, object]]]:
  98. signals = signal_frame(spec, frame)
  99. warmup = max(spec.range_lookback + spec.compression_window, spec.htf_slow * 4 + spec.htf_slope_lookback * 4) + 2
  100. equity = INITIAL_EQUITY
  101. position: dict[str, object] | None = None
  102. pending_entry: str | None = None
  103. pending_exit = False
  104. trades: list[dict[str, object]] = []
  105. curve: list[tuple[pd.Timestamp, float]] = []
  106. rows = list(frame.itertuples())
  107. for index in range(warmup, len(rows)):
  108. row = rows[index]
  109. ts = frame.index[index]
  110. if pending_exit and position is not None:
  111. net = close_return(str(position["side"]), float(position["entry_price"]), float(row.open))
  112. equity *= max(0.0, 1 + net)
  113. trades.append({"side": position["side"], "entry_time": position["entry_time"], "exit_time": ts, "return": net})
  114. position = None
  115. pending_exit = False
  116. if pending_entry is not None and position is None and equity > 0:
  117. side = pending_entry
  118. position = {
  119. "side": side,
  120. "entry_time": ts,
  121. "entry_index": index,
  122. "entry_price": float(row.open),
  123. "stop": float(row.open) * (1 - spec.stop_pct if side == "long" else 1 + spec.stop_pct),
  124. "take": float(row.open) * (1 + spec.take_pct if side == "long" else 1 - spec.take_pct),
  125. }
  126. pending_entry = None
  127. mark = equity
  128. if position is not None:
  129. side = str(position["side"])
  130. stop_hit = row.low <= float(position["stop"]) if side == "long" else row.high >= float(position["stop"])
  131. take_hit = row.high >= float(position["take"]) if side == "long" else row.low <= float(position["take"])
  132. if stop_hit or take_hit:
  133. price = float(position["stop"] if stop_hit else position["take"])
  134. net = close_return(side, float(position["entry_price"]), price)
  135. equity *= max(0.0, 1 + net)
  136. trades.append({"side": side, "entry_time": position["entry_time"], "exit_time": ts, "return": net})
  137. position = None
  138. mark = equity
  139. else:
  140. gross = row.close / float(position["entry_price"]) - 1 if side == "long" else float(position["entry_price"]) / row.close - 1
  141. mark = equity * (1 + gross - FEE)
  142. curve.append((ts, mark))
  143. if index == len(rows) - 1 or equity <= 0:
  144. continue
  145. if position is not None:
  146. side = str(position["side"])
  147. held = index - int(position["entry_index"])
  148. if bool(signals[f"{side}_exit"].iloc[index]) or held >= spec.hold:
  149. pending_exit = True
  150. elif bool(signals["short_entry"].iloc[index]):
  151. pending_entry = "short"
  152. elif bool(signals["long_entry"].iloc[index]):
  153. pending_entry = "long"
  154. series = pd.Series({ts: value for ts, value in curve}).sort_index()
  155. daily = series.resample("1D").last().ffill()
  156. daily = pd.concat([pd.Series([INITIAL_EQUITY], index=[daily.index[0].normalize()]), daily]).sort_index()
  157. return daily.groupby(level=0).last(), trades
  158. def period_metrics(equity: pd.Series, trades: list[dict[str, object]], offset: pd.DateOffset | None) -> dict[str, object]:
  159. start = equity.index[0] if offset is None else equity.index[-1] - offset
  160. scoped = equity[equity.index >= start]
  161. scoped_trades = [trade for trade in trades if pd.Timestamp(trade["entry_time"]) >= scoped.index[0]]
  162. total = float(scoped.iloc[-1] / scoped.iloc[0] - 1)
  163. years = (scoped.index[-1] - scoped.index[0]).total_seconds() / 86_400 / 365
  164. annual = (1 + total) ** (1 / years) - 1 if total > -1 and years > 0 else 0.0
  165. drawdown = float(((scoped.cummax() - scoped) / scoped.cummax()).max())
  166. returns = [float(trade["return"]) for trade in scoped_trades]
  167. wins = [value for value in returns if value > 0]
  168. losses = [value for value in returns if value < 0]
  169. profit_factor = sum(wins) / abs(sum(losses)) if losses else (999.0 if wins else 0.0)
  170. return {
  171. "total_return": total,
  172. "annualized_return": annual,
  173. "max_drawdown": drawdown,
  174. "win_rate": len(wins) / len(returns) if returns else 0.0,
  175. "profit_factor": profit_factor,
  176. "trades": len(returns),
  177. }
  178. def stability_rows(equity: pd.Series, trades: list[dict[str, object]]) -> pd.DataFrame:
  179. rows: list[dict[str, object]] = []
  180. for freq, label_name in (("YE", "year"), ("ME", "month")):
  181. sampled = equity.resample(freq).last().dropna()
  182. starts = equity.resample(freq).first().reindex(sampled.index)
  183. returns = sampled / starts - 1
  184. period_freq = "Y" if freq == "YE" else "M"
  185. periods = sampled.index.tz_localize(None).to_period(period_freq)
  186. for period, value in zip(periods.astype(str), returns):
  187. scoped = [
  188. trade
  189. for trade in trades
  190. if pd.Timestamp(trade["entry_time"]).tz_localize(None).to_period(period_freq) == pd.Period(period)
  191. ]
  192. rows.append({"bucket": label_name, "period": period, "return": float(value), "trades": len(scoped)})
  193. return pd.DataFrame(rows)
  194. def build_specs() -> list[Spec]:
  195. specs: list[Spec] = []
  196. for symbol in SYMBOLS:
  197. for bar in ("1H", "2H", "4H"):
  198. for side_mode in ("short", "bidir"):
  199. for range_lookback in (12, 24, 36):
  200. for compression_quantile in (0.15, 0.25, 0.35):
  201. for sweep_pct in (0.001, 0.002, 0.0035):
  202. specs.append(
  203. Spec(
  204. symbol=symbol,
  205. bar=bar,
  206. side_mode=side_mode,
  207. range_lookback=range_lookback,
  208. compression_window=240,
  209. compression_quantile=compression_quantile,
  210. sweep_pct=sweep_pct,
  211. htf_slow=80,
  212. htf_slope_lookback=6,
  213. stop_pct=0.025,
  214. take_pct=0.035,
  215. hold=36,
  216. )
  217. )
  218. return specs
  219. def row_for_spec(spec: Spec, equity: pd.Series, trades: list[dict[str, object]]) -> dict[str, object]:
  220. row: dict[str, object] = {"name": spec.name, "symbol": spec.symbol, "bar": spec.bar, "side_mode": spec.side_mode}
  221. for label, offset in HORIZONS:
  222. metrics = period_metrics(equity, trades, offset)
  223. for key, value in metrics.items():
  224. row[f"{label}_{key}"] = value
  225. return row
  226. def markdown_table(frame: pd.DataFrame) -> str:
  227. def cell(value: object) -> str:
  228. if isinstance(value, float):
  229. return f"{value:.4f}"
  230. return str(value).replace("|", "\\|")
  231. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  232. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  233. return "\n".join("| " + " | ".join(cell(value) for value in row) + " |" for row in rows)
  234. def markdown_report(totals: pd.DataFrame, selected: pd.Series, stability: pd.DataFrame) -> str:
  235. metric_rows = []
  236. for label, _ in HORIZONS:
  237. metric_rows.append(
  238. {
  239. "period": label,
  240. "total_return": selected[f"{label}_total_return"],
  241. "annualized_return": selected[f"{label}_annualized_return"],
  242. "max_drawdown": selected[f"{label}_max_drawdown"],
  243. "win_rate": selected[f"{label}_win_rate"],
  244. "profit_factor": selected[f"{label}_profit_factor"],
  245. "trades": selected[f"{label}_trades"],
  246. }
  247. )
  248. years = stability[stability["bucket"] == "year"]
  249. months = stability[stability["bucket"] == "month"]
  250. active_months = months[months["trades"] > 0]
  251. losing_years = int((years["return"] < 0).sum())
  252. losing_active_months = int((active_months["return"] < 0).sum())
  253. active_month_ratio = len(active_months) / len(months) if len(months) else 0.0
  254. verdict = "not worth continuing"
  255. if (
  256. selected["full_profit_factor"] >= 1.15
  257. and selected["full_max_drawdown"] <= 0.25
  258. and selected["3y_total_return"] > 0
  259. and selected["1y_total_return"] > 0
  260. and selected["6m_total_return"] > 0
  261. and selected["3m_total_return"] > 0
  262. and selected["1y_trades"] >= 10
  263. and active_month_ratio >= 0.4
  264. and losing_years <= 2
  265. ):
  266. verdict = "worth continuing with a narrower robustness pass"
  267. keep = [
  268. "name",
  269. "symbol",
  270. "bar",
  271. "side_mode",
  272. "full_total_return",
  273. "full_annualized_return",
  274. "full_max_drawdown",
  275. "full_win_rate",
  276. "full_profit_factor",
  277. "full_trades",
  278. "3y_total_return",
  279. "1y_total_return",
  280. "6m_total_return",
  281. "3m_total_return",
  282. ]
  283. short_only = totals[totals["side_mode"] == "short"].head(5)
  284. return (
  285. "# ETH/BTC False Breakout Reversal Search\n\n"
  286. "Scope: local OKX ETH/BTC candle CSV only; no live trading. "
  287. "Excluded: staged entry, ETH/BTC relative momentum, crash-follow, calendar/time buckets, trend_exhaustion.\n\n"
  288. "First-principles signal: a narrow rolling range means recent price agreement is compressed. "
  289. "If price sweeps the upper boundary but closes back inside while the higher-timeframe EMA slope is non-positive, "
  290. "the breakout has failed and a short targets reversion toward the range center. "
  291. "The bidirectional variant mirrors this for lower-boundary failures only for comparison.\n\n"
  292. f"Selected candidate: `{selected['name']}`.\n\n"
  293. f"Verdict: {verdict}.\n\n"
  294. "## Selected metrics\n\n"
  295. f"{markdown_table(pd.DataFrame(metric_rows))}\n\n"
  296. "## Stability\n\n"
  297. f"Years: {len(years)}, losing years: {losing_years}. "
  298. f"Months: {len(months)}, active months: {len(active_months)}, losing active months: {losing_active_months}, "
  299. f"active month ratio: {active_month_ratio:.4f}.\n\n"
  300. f"{markdown_table(years[['period', 'return', 'trades']])}\n\n"
  301. "Worst active months:\n\n"
  302. f"{markdown_table(active_months.sort_values('return').head(12)[['period', 'return', 'trades']])}\n\n"
  303. "## Best short-only variants\n\n"
  304. f"{markdown_table(short_only[keep])}\n\n"
  305. "## Top 10\n\n"
  306. f"{markdown_table(totals.head(10)[keep])}\n"
  307. )
  308. def main() -> int:
  309. parser = argparse.ArgumentParser()
  310. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  311. args = parser.parse_args()
  312. raw_frames = {symbol: load_frame(symbol) for symbol in SYMBOLS}
  313. frames = {(symbol, bar): resample(raw_frames[symbol], bar) for symbol in SYMBOLS for bar in ("1H", "2H", "4H")}
  314. rows = []
  315. curves: dict[str, pd.Series] = {}
  316. trade_sets: dict[str, list[dict[str, object]]] = {}
  317. for index, spec in enumerate(build_specs(), start=1):
  318. equity, trades = run_spec(spec, frames[(spec.symbol, spec.bar)])
  319. rows.append(row_for_spec(spec, equity, trades))
  320. curves[spec.name] = equity
  321. trade_sets[spec.name] = trades
  322. if index % 100 == 0:
  323. print(f"done {index}", flush=True)
  324. totals = pd.DataFrame(rows).sort_values(
  325. ["full_total_return", "3y_total_return", "1y_total_return", "full_profit_factor"],
  326. ascending=[False, False, False, False],
  327. )
  328. viable = totals[
  329. (totals["full_trades"] >= 30)
  330. & (totals["full_profit_factor"] > 1)
  331. & (totals["3y_total_return"] > 0)
  332. & (totals["1y_total_return"] > 0)
  333. & (totals["6m_total_return"] > 0)
  334. & (totals["3m_total_return"] > 0)
  335. ]
  336. selected = (viable if len(viable) else totals).iloc[0]
  337. stability = stability_rows(curves[str(selected["name"])], trade_sets[str(selected["name"])])
  338. args.output_dir.mkdir(parents=True, exist_ok=True)
  339. totals_path = args.output_dir / "eth-btc-false-breakout-reversal-totals.csv"
  340. stability_path = args.output_dir / "eth-btc-false-breakout-reversal-stability.csv"
  341. report_path = args.output_dir / "eth-btc-false-breakout-reversal-report.md"
  342. totals.to_csv(totals_path, index=False)
  343. stability.to_csv(stability_path, index=False)
  344. report_path.write_text(markdown_report(totals, selected, stability), encoding="utf-8")
  345. print(f"selected {selected['name']}")
  346. print(f"wrote {totals_path}, {stability_path}, {report_path}")
  347. return 0
  348. if __name__ == "__main__":
  349. raise SystemExit(main())