search_eth_bearish_failure_confirmation.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. from __future__ import annotations
  2. import argparse
  3. from dataclasses import dataclass
  4. from pathlib import Path
  5. import pandas as pd
  6. DATA_DIR = Path("data/okx-candles")
  7. OUTPUT_DIR = Path("reports/eth-exploration")
  8. SYMBOL = "ETH-USDT-SWAP"
  9. BTC_SYMBOL = "BTC-USDT-SWAP"
  10. INITIAL_EQUITY = 10_000.0
  11. FEE = 0.0004
  12. ROUNDTRIP_FEE = FEE * 2
  13. HORIZONS = (
  14. ("full", None),
  15. ("3y", pd.DateOffset(years=3)),
  16. ("1y", pd.DateOffset(years=1)),
  17. ("6m", pd.DateOffset(months=6)),
  18. ("3m", pd.DateOffset(months=3)),
  19. )
  20. @dataclass(frozen=True)
  21. class Spec:
  22. family: str
  23. bar: str
  24. fast: int
  25. slow: int
  26. lookback: int
  27. threshold: float
  28. stop: float
  29. take: float
  30. hold: int
  31. gate: str
  32. @property
  33. def name(self) -> str:
  34. return (
  35. f"{self.family}-{self.bar}-f{self.fast}-s{self.slow}-lb{self.lookback}"
  36. f"-th{self.threshold:g}-sl{self.stop:g}-tp{self.take:g}-h{self.hold}-{self.gate}"
  37. )
  38. def load_frame(symbol: str) -> pd.DataFrame:
  39. frame = pd.read_csv(DATA_DIR / symbol / "15m.csv")
  40. frame["ts"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
  41. return frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("ts")
  42. def resample(frame: pd.DataFrame, bar: str) -> pd.DataFrame:
  43. rule = {"15m": "15min", "1H": "1h", "4H": "4h"}[bar]
  44. return (
  45. frame.resample(rule, label="left", closed="left")
  46. .agg(
  47. open=("open", "first"),
  48. high=("high", "max"),
  49. low=("low", "min"),
  50. close=("close", "last"),
  51. volume=("volume", "sum"),
  52. )
  53. .dropna()
  54. )
  55. def rsi(close: pd.Series, length: int) -> pd.Series:
  56. diff = close.diff()
  57. gain = diff.clip(lower=0).ewm(alpha=1 / length, adjust=False).mean()
  58. loss = (-diff.clip(upper=0)).ewm(alpha=1 / length, adjust=False).mean()
  59. return 100 - 100 / (1 + gain / loss)
  60. def joined_frames(eth: pd.DataFrame, btc: pd.DataFrame) -> pd.DataFrame:
  61. return eth.join(btc[["close"]].rename(columns={"close": "btc_close"}), how="inner")
  62. def risk_gate(frame: pd.DataFrame, gate: str) -> pd.Series:
  63. if gate == "none":
  64. return pd.Series(True, index=frame.index)
  65. btc = frame["btc_close"]
  66. btc_slow = btc.rolling(160).mean()
  67. btc_return = btc / btc.shift(24) - 1
  68. btc_vol = btc.pct_change().rolling(48).std()
  69. if gate == "btc_riskoff":
  70. return (btc < btc_slow) & (btc_return < -0.01)
  71. if gate == "btc_riskoff_vol":
  72. return (btc < btc_slow) & (btc_return < 0) & (btc_vol > btc_vol.rolling(240).median())
  73. raise ValueError(gate)
  74. def signals(spec: Spec, frame: pd.DataFrame) -> tuple[pd.Series, pd.Series]:
  75. close = frame["close"]
  76. high = frame["high"]
  77. low = frame["low"]
  78. open_ = frame["open"]
  79. fast = close.ewm(span=spec.fast, adjust=False).mean()
  80. slow = close.ewm(span=spec.slow, adjust=False).mean()
  81. rsi14 = rsi(close, 14)
  82. ret = close / close.shift(spec.lookback) - 1
  83. body = (close - open_) / open_
  84. range_pct = (high - low) / close
  85. range_rank = range_pct.rolling(200).rank(pct=True)
  86. volume_rank = frame["volume"].rolling(200).rank(pct=True)
  87. gate = risk_gate(frame, spec.gate)
  88. if spec.family == "mr_failure":
  89. prior_oversold = rsi14.shift(2).rolling(spec.lookback).min() < 34
  90. rebound_to_fast = high >= fast
  91. failed_reclaim = (close < fast) & (close < open_) & (ret > spec.threshold * 0.25)
  92. entry = gate & (close < slow) & prior_oversold & rebound_to_fast & failed_reclaim
  93. exit_ = (close > fast) | (rsi14 < 32)
  94. elif spec.family == "vol_second_confirm":
  95. prior_expansion = (
  96. (range_rank.shift(1) > 0.82)
  97. & (volume_rank.shift(1) > 0.60)
  98. & (body.shift(1) < -spec.threshold)
  99. )
  100. second_fail = (high < high.shift(1)) & (close < (open_.shift(1) + close.shift(1)) / 2) & (close < open_)
  101. entry = gate & (close < slow) & prior_expansion & second_fail
  102. exit_ = (close > fast) | (rsi14 < 30)
  103. elif spec.family == "trend_exhaustion":
  104. downtrend = (fast < slow) & (slow < slow.shift(spec.lookback))
  105. relief = close / close.rolling(spec.lookback * 2).min() - 1
  106. rejection = (high > fast) & (close < fast) & (close < open_) & (rsi14 > 45)
  107. entry = gate & downtrend & (relief > spec.threshold) & rejection
  108. exit_ = (close > slow) | (rsi14 < 35)
  109. else:
  110. raise ValueError(spec.family)
  111. return entry.fillna(False), exit_.fillna(False)
  112. def close_return(entry: float, exit_: float) -> float:
  113. return entry / exit_ - 1 - ROUNDTRIP_FEE
  114. def run_spec(spec: Spec, frame: pd.DataFrame) -> tuple[pd.Series, list[dict[str, object]]]:
  115. entry, exit_ = signals(spec, frame)
  116. warmup = max(spec.slow, 260, spec.lookback * 3) + 2
  117. equity = INITIAL_EQUITY
  118. position: dict[str, object] | None = None
  119. pending_entry = False
  120. pending_exit = False
  121. trades: list[dict[str, object]] = []
  122. curve: list[tuple[pd.Timestamp, float]] = []
  123. rows = list(frame.itertuples())
  124. for index in range(warmup, len(rows)):
  125. candle = rows[index]
  126. ts = frame.index[index]
  127. if pending_exit and position is not None:
  128. net = close_return(float(position["entry_price"]), float(candle.open))
  129. equity *= 1 + net
  130. trades.append({"entry_time": position["entry_time"], "exit_time": ts, "return": net})
  131. position = None
  132. pending_exit = False
  133. if pending_entry and position is None and equity > 0:
  134. position = {
  135. "entry_time": ts,
  136. "entry_index": index,
  137. "entry_price": float(candle.open),
  138. "stop": float(candle.open) * (1 + spec.stop),
  139. "take": float(candle.open) * (1 - spec.take),
  140. }
  141. pending_entry = False
  142. mark = equity
  143. if position is not None:
  144. stop_hit = candle.high >= float(position["stop"])
  145. take_hit = candle.low <= float(position["take"])
  146. if stop_hit or take_hit:
  147. price = float(position["stop"] if stop_hit else position["take"])
  148. net = close_return(float(position["entry_price"]), price)
  149. equity *= 1 + net
  150. trades.append({"entry_time": position["entry_time"], "exit_time": ts, "return": net})
  151. position = None
  152. mark = equity
  153. else:
  154. gross = float(position["entry_price"]) / candle.close - 1
  155. mark = equity * (1 + gross - FEE)
  156. curve.append((ts, mark))
  157. if index == len(rows) - 1 or equity <= 0:
  158. continue
  159. if position is None and bool(entry.iloc[index]):
  160. pending_entry = True
  161. elif position is not None and (bool(exit_.iloc[index]) or index - int(position["entry_index"]) >= spec.hold):
  162. pending_exit = True
  163. series = pd.Series({ts: value for ts, value in curve}).sort_index()
  164. daily = series.resample("1D").last().ffill()
  165. daily = pd.concat([pd.Series([INITIAL_EQUITY], index=[daily.index[0].normalize()]), daily]).sort_index()
  166. return daily.groupby(level=0).last(), trades
  167. def period_metrics(equity: pd.Series, trades: list[dict[str, object]], offset: pd.DateOffset | None) -> dict[str, object]:
  168. start = equity.index[0] if offset is None else equity.index[-1] - offset
  169. scoped = equity[equity.index >= start]
  170. scoped_trades = [trade for trade in trades if pd.Timestamp(trade["entry_time"]) >= scoped.index[0]]
  171. total = float(scoped.iloc[-1] / scoped.iloc[0] - 1)
  172. years = (scoped.index[-1] - scoped.index[0]).total_seconds() / 86_400 / 365
  173. annual = (1 + total) ** (1 / years) - 1 if total > -1 and years > 0 else 0.0
  174. drawdown = float(((scoped.cummax() - scoped) / scoped.cummax()).max())
  175. returns = [float(trade["return"]) for trade in scoped_trades]
  176. wins = [value for value in returns if value > 0]
  177. losses = [value for value in returns if value < 0]
  178. profit_factor = sum(wins) / abs(sum(losses)) if losses else (999.0 if wins else 0.0)
  179. return {
  180. "total_return": total,
  181. "annualized_return": annual,
  182. "max_drawdown": drawdown,
  183. "win_rate": len(wins) / len(returns) if returns else 0.0,
  184. "profit_factor": profit_factor,
  185. "trades": len(returns),
  186. }
  187. def stability_rows(equity: pd.Series, trades: list[dict[str, object]]) -> pd.DataFrame:
  188. rows: list[dict[str, object]] = []
  189. for freq, label_name in (("YE", "year"), ("ME", "month")):
  190. sampled = equity.resample(freq).last().dropna()
  191. starts = equity.resample(freq).first().reindex(sampled.index)
  192. returns = sampled / starts - 1
  193. period_freq = "Y" if freq == "YE" else "M"
  194. periods = sampled.index.tz_localize(None).to_period(period_freq)
  195. for period, value in zip(periods.astype(str), returns):
  196. scoped = [
  197. trade
  198. for trade in trades
  199. if pd.Timestamp(trade["entry_time"]).tz_localize(None).to_period(period_freq) == pd.Period(period)
  200. ]
  201. rows.append({"bucket": label_name, "period": period, "return": float(value), "trades": len(scoped)})
  202. return pd.DataFrame(rows)
  203. def build_specs() -> list[Spec]:
  204. specs: list[Spec] = []
  205. for bar in ("1H", "4H"):
  206. for fast, slow in ((20, 120), (34, 180), (50, 240)):
  207. for gate in ("none", "btc_riskoff", "btc_riskoff_vol"):
  208. for lookback in (8, 16, 24):
  209. for threshold in (0.012, 0.02, 0.032):
  210. specs.append(Spec("mr_failure", bar, fast, slow, lookback, threshold, 0.025, 0.04, 48, gate))
  211. specs.append(Spec("trend_exhaustion", bar, fast, slow, lookback, threshold, 0.03, 0.045, 72, gate))
  212. for threshold in (0.008, 0.014, 0.02):
  213. specs.append(Spec("vol_second_confirm", bar, fast, slow, 8, threshold, 0.025, 0.045, 48, gate))
  214. return specs
  215. def row_for_spec(spec: Spec, equity: pd.Series, trades: list[dict[str, object]]) -> dict[str, object]:
  216. row: dict[str, object] = {"name": spec.name, "family": spec.family, "bar": spec.bar, "gate": spec.gate}
  217. for label, offset in HORIZONS:
  218. metrics = period_metrics(equity, trades, offset)
  219. for key, value in metrics.items():
  220. row[f"{label}_{key}"] = value
  221. return row
  222. def markdown_table(frame: pd.DataFrame) -> str:
  223. def cell(value: object) -> str:
  224. if isinstance(value, float):
  225. return f"{value:.4f}"
  226. return str(value).replace("|", "\\|")
  227. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  228. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  229. return "\n".join("| " + " | ".join(cell(value) for value in row) + " |" for row in rows)
  230. def markdown_report(totals: pd.DataFrame, selected: pd.Series, stability: pd.DataFrame) -> str:
  231. metric_rows = []
  232. for label, _ in HORIZONS:
  233. metric_rows.append(
  234. {
  235. "period": label,
  236. "total_return": selected[f"{label}_total_return"],
  237. "annualized_return": selected[f"{label}_annualized_return"],
  238. "max_drawdown": selected[f"{label}_max_drawdown"],
  239. "win_rate": selected[f"{label}_win_rate"],
  240. "profit_factor": selected[f"{label}_profit_factor"],
  241. "trades": selected[f"{label}_trades"],
  242. }
  243. )
  244. keep = [
  245. "name",
  246. "family",
  247. "bar",
  248. "gate",
  249. "full_total_return",
  250. "full_annualized_return",
  251. "full_max_drawdown",
  252. "full_win_rate",
  253. "full_profit_factor",
  254. "full_trades",
  255. "3y_total_return",
  256. "1y_total_return",
  257. "6m_total_return",
  258. "3m_total_return",
  259. ]
  260. years = stability[stability["bucket"] == "year"]
  261. months = stability[stability["bucket"] == "month"]
  262. losing_years = int((years["return"] < 0).sum())
  263. losing_months = int((months["return"] < 0).sum())
  264. active_months = months[months["trades"] > 0]
  265. verdict = "not worth continuing"
  266. if (
  267. selected["full_profit_factor"] > 1.12
  268. and selected["3y_total_return"] > 0
  269. and selected["1y_total_return"] > 0
  270. and selected["6m_total_return"] > 0
  271. and selected["3m_total_return"] > 0
  272. and losing_years <= 2
  273. ):
  274. verdict = "worth a narrow follow-up, but only after reducing drawdown"
  275. return (
  276. "# ETH Bearish Failure/Confirmation Search\n\n"
  277. "Scope: local OKX ETH/BTC candle CSV only; ETH short-only entries; BTC absolute risk-off filters only. "
  278. "Excluded: staged entry, ETH/BTC relative momentum, crash-follow, calendar/time buckets.\n\n"
  279. "Families: counter-trend mean-reversion failure, volatility expansion second confirmation, and trend exhaustion.\n\n"
  280. f"Selected candidate: `{selected['name']}`.\n\n"
  281. f"Verdict: {verdict}.\n\n"
  282. "## Selected metrics\n\n"
  283. f"{markdown_table(pd.DataFrame(metric_rows))}\n\n"
  284. "## Stability\n\n"
  285. f"Years: {len(years)}, losing years: {losing_years}. "
  286. f"Months: {len(months)}, active months: {len(active_months)}, losing months: {losing_months}.\n\n"
  287. f"{markdown_table(years[['period', 'return', 'trades']])}\n\n"
  288. "Worst active months:\n\n"
  289. f"{markdown_table(active_months.sort_values('return').head(12)[['period', 'return', 'trades']])}\n\n"
  290. "## Top 10\n\n"
  291. f"{markdown_table(totals.head(10)[keep])}\n"
  292. )
  293. def main() -> int:
  294. parser = argparse.ArgumentParser()
  295. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  296. args = parser.parse_args()
  297. eth_15m = load_frame(SYMBOL)
  298. btc_15m = load_frame(BTC_SYMBOL)
  299. frames = {bar: joined_frames(resample(eth_15m, bar), resample(btc_15m, bar)) for bar in ("1H", "4H")}
  300. rows = []
  301. curves: dict[str, pd.Series] = {}
  302. trade_sets: dict[str, list[dict[str, object]]] = {}
  303. specs = build_specs()
  304. for index, spec in enumerate(specs, start=1):
  305. equity, trades = run_spec(spec, frames[spec.bar])
  306. rows.append(row_for_spec(spec, equity, trades))
  307. curves[spec.name] = equity
  308. trade_sets[spec.name] = trades
  309. if index % 100 == 0:
  310. print(f"done {index}/{len(specs)}", flush=True)
  311. totals = pd.DataFrame(rows).sort_values(
  312. ["full_total_return", "3y_total_return", "1y_total_return", "full_profit_factor"],
  313. ascending=[False, False, False, False],
  314. )
  315. viable = totals[
  316. (totals["full_trades"] >= 25)
  317. & (totals["full_profit_factor"] > 1)
  318. & (totals["3y_total_return"] > 0)
  319. & (totals["1y_total_return"] > 0)
  320. & (totals["6m_total_return"] > 0)
  321. & (totals["3m_total_return"] > 0)
  322. ]
  323. selected = (viable if len(viable) else totals).iloc[0]
  324. stability = stability_rows(curves[str(selected["name"])], trade_sets[str(selected["name"])])
  325. args.output_dir.mkdir(parents=True, exist_ok=True)
  326. totals_path = args.output_dir / "eth-bearish-failure-confirmation-totals.csv"
  327. stability_path = args.output_dir / "eth-bearish-failure-confirmation-stability.csv"
  328. report_path = args.output_dir / "eth-bearish-failure-confirmation-report.md"
  329. totals.to_csv(totals_path, index=False)
  330. stability.to_csv(stability_path, index=False)
  331. report_path.write_text(markdown_report(totals, selected, stability), encoding="utf-8")
  332. print(markdown_table(pd.DataFrame([selected]).drop(columns=["name"]).iloc[:, :12]))
  333. print(f"selected {selected['name']}")
  334. print(f"wrote {totals_path}, {stability_path}, {report_path}")
  335. return 0
  336. if __name__ == "__main__":
  337. raise SystemExit(main())