revalidate_eth_high_freq_short_bidir_candidates.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. from __future__ import annotations
  2. import argparse
  3. import json
  4. from dataclasses import dataclass
  5. from pathlib import Path
  6. import pandas as pd
  7. DATA_DIR = Path("data/okx-candles")
  8. OUT_DIR = Path("reports/ultrashort")
  9. SYMBOL = "ETH-USDT-SWAP"
  10. INITIAL_EQUITY = 10_000.0
  11. LEVERAGE = 3.0
  12. TAKER_FEE = 0.0004
  13. HORIZONS = (
  14. ("full", None),
  15. ("3y", pd.DateOffset(years=3)),
  16. ("1y", pd.DateOffset(years=1)),
  17. ("6m", pd.DateOffset(months=6)),
  18. ("3m", pd.DateOffset(months=3)),
  19. ("30d", pd.DateOffset(days=30)),
  20. ("14d", pd.DateOffset(days=14)),
  21. )
  22. @dataclass(frozen=True)
  23. class Candidate:
  24. family: str
  25. bar: str
  26. params: dict[str, float | int | str]
  27. @property
  28. def name(self) -> str:
  29. body = "-".join(f"{key}{value:g}" if isinstance(value, float) else f"{key}{value}" for key, value in self.params.items())
  30. return f"{self.family}-{self.bar}-{body}"
  31. def load_frame(bar: str) -> pd.DataFrame:
  32. frame = pd.read_csv(DATA_DIR / SYMBOL / f"{bar}.csv")
  33. frame["dt"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
  34. return frame.sort_values("ts").drop_duplicates("ts", keep="last").reset_index(drop=True)
  35. def load_cache_summary(bars: list[str]) -> pd.DataFrame:
  36. rows = []
  37. for bar in bars:
  38. meta_path = DATA_DIR / SYMBOL / f"{bar}.meta.json"
  39. meta = json.loads(meta_path.read_text(encoding="utf-8"))
  40. rows.append(
  41. {
  42. "bar": bar,
  43. "rows": int(meta["rows"]),
  44. "first_time": pd.to_datetime(int(meta["first_ts"]), unit="ms", utc=True).strftime("%Y-%m-%d %H:%M"),
  45. "last_time": pd.to_datetime(int(meta["last_ts"]), unit="ms", utc=True).strftime("%Y-%m-%d %H:%M"),
  46. "history_exhausted": bool(meta["history_exhausted"]),
  47. }
  48. )
  49. return pd.DataFrame(rows)
  50. def rsi(close: pd.Series, length: int) -> pd.Series:
  51. delta = close.diff()
  52. gain = delta.clip(lower=0.0).rolling(length).mean()
  53. loss = (-delta.clip(upper=0.0)).rolling(length).mean()
  54. return 100.0 - 100.0 / (1.0 + gain / loss)
  55. def build_candidates(bars: list[str]) -> list[Candidate]:
  56. candidates: list[Candidate] = []
  57. for bar in bars:
  58. for window in (48, 96):
  59. base = {"window": window, "entry_z": 1.5, "exit_z": 0.20, "stop": 0.006, "take": 0.009, "hold": 12}
  60. candidates.append(Candidate("vwap_bidir", bar, base))
  61. candidates.append(Candidate("vwap_short", bar, base))
  62. for trend in (96, 192):
  63. candidates.append(
  64. Candidate(
  65. "rsi_short",
  66. bar,
  67. {"trend": trend, "entry": 90, "exit": 45, "stop": 0.0075, "take": 0.010, "hold": 12},
  68. )
  69. )
  70. candidates.append(
  71. Candidate(
  72. "rsi_bidir",
  73. bar,
  74. {"trend": trend, "entry": 10, "exit": 55, "stop": 0.0075, "take": 0.010, "hold": 12},
  75. )
  76. )
  77. for lookback in (48, 96):
  78. candidates.append(
  79. Candidate(
  80. "breakdown_short",
  81. bar,
  82. {"lookback": lookback, "stop": 0.006, "take": 0.012, "hold": 12},
  83. )
  84. )
  85. return candidates
  86. def signal_columns(frame: pd.DataFrame, candidate: Candidate) -> tuple[pd.Series, pd.Series]:
  87. close = frame["close"]
  88. params = candidate.params
  89. if candidate.family in ("vwap_bidir", "vwap_short"):
  90. window = int(params["window"])
  91. volume = frame["volume"]
  92. vwap = (close * volume).rolling(window).sum() / volume.rolling(window).sum()
  93. stdev = close.rolling(window).std(ddof=0)
  94. zscore = (close - vwap) / stdev
  95. entry = pd.Series("", index=frame.index, dtype=object)
  96. entry.loc[zscore >= float(params["entry_z"])] = "short"
  97. if candidate.family == "vwap_bidir":
  98. entry.loc[zscore <= -float(params["entry_z"])] = "long"
  99. return entry, zscore.abs() <= float(params["exit_z"])
  100. if candidate.family in ("rsi_short", "rsi_bidir"):
  101. trend = close.rolling(int(params["trend"])).mean()
  102. value = rsi(close, 2)
  103. entry = pd.Series("", index=frame.index, dtype=object)
  104. entry.loc[(close < trend) & (value >= float(params["entry"]))] = "short"
  105. if candidate.family == "rsi_bidir":
  106. entry.loc[(close > trend) & (value <= float(params["entry"]))] = "long"
  107. return entry, (value <= 100.0 - float(params["exit"])) | (value >= float(params["exit"]))
  108. lookback = int(params["lookback"])
  109. prior_low = frame["low"].shift(1).rolling(lookback).min()
  110. entry = pd.Series("", index=frame.index, dtype=object)
  111. entry.loc[close < prior_low] = "short"
  112. return entry, pd.Series(False, index=frame.index)
  113. def close_return(side: str, entry: float, exit_price: float) -> float:
  114. price_return = exit_price / entry - 1.0 if side == "long" else entry / exit_price - 1.0
  115. return LEVERAGE * price_return - LEVERAGE * TAKER_FEE * (1.0 + exit_price / entry)
  116. def mark_return(side: str, entry: float, close: float) -> float:
  117. price_return = close / entry - 1.0 if side == "long" else entry / close - 1.0
  118. return LEVERAGE * price_return - LEVERAGE * TAKER_FEE
  119. def backtest(frame: pd.DataFrame, candidate: Candidate) -> tuple[pd.Series, pd.DataFrame]:
  120. entry_signal, exit_signal = signal_columns(frame, candidate)
  121. warmup = max(int(value) for key, value in candidate.params.items() if key in {"window", "trend", "lookback"}) + 2
  122. equity = INITIAL_EQUITY
  123. position: dict[str, object] | None = None
  124. pending_entry = ""
  125. pending_exit = False
  126. curve: list[tuple[pd.Timestamp, float]] = []
  127. trades: list[dict[str, object]] = []
  128. rows = list(frame.itertuples(index=False))
  129. for index in range(warmup, len(rows)):
  130. candle = rows[index]
  131. if pending_exit and position is not None:
  132. net = close_return(str(position["side"]), float(position["entry"]), float(candle.open))
  133. equity *= 1.0 + net
  134. trades.append({"entry_time": position["entry_time"], "exit_time": candle.dt, "side": position["side"], "return": net})
  135. position = None
  136. pending_exit = False
  137. if pending_entry and position is None and equity > 0.0:
  138. position = {"side": pending_entry, "entry": float(candle.open), "entry_index": index, "entry_time": candle.dt}
  139. pending_entry = ""
  140. mark = equity
  141. if position is not None:
  142. side = str(position["side"])
  143. entry = float(position["entry"])
  144. stop = float(candidate.params["stop"])
  145. take = float(candidate.params["take"])
  146. stop_price = entry * (1.0 - stop if side == "long" else 1.0 + stop)
  147. take_price = entry * (1.0 + take if side == "long" else 1.0 - take)
  148. stop_hit = candle.low <= stop_price if side == "long" else candle.high >= stop_price
  149. take_hit = candle.high >= take_price if side == "long" else candle.low <= take_price
  150. if stop_hit or take_hit:
  151. exit_price = stop_price if stop_hit else take_price
  152. net = close_return(side, entry, exit_price)
  153. equity *= 1.0 + net
  154. trades.append({"entry_time": position["entry_time"], "exit_time": candle.dt, "side": side, "return": net})
  155. position = None
  156. mark = equity
  157. else:
  158. mark = equity * (1.0 + mark_return(side, entry, float(candle.close)))
  159. curve.append((candle.dt, mark))
  160. if index == len(rows) - 1 or equity <= 0.0:
  161. continue
  162. next_entry = str(entry_signal.iloc[index])
  163. if position is not None:
  164. reverse = bool(next_entry) and next_entry != position["side"]
  165. stale = index - int(position["entry_index"]) >= int(candidate.params["hold"])
  166. if bool(exit_signal.iloc[index]) or reverse or stale:
  167. pending_exit = True
  168. pending_entry = next_entry if reverse else ""
  169. elif next_entry:
  170. pending_entry = next_entry
  171. if position is not None:
  172. final = rows[-1]
  173. net = close_return(str(position["side"]), float(position["entry"]), float(final.close))
  174. equity *= 1.0 + net
  175. trades.append({"entry_time": position["entry_time"], "exit_time": final.dt, "side": position["side"], "return": net})
  176. curve.append((final.dt, equity))
  177. return pd.Series(dict(curve)).sort_index(), pd.DataFrame(trades)
  178. def scoped(equity: pd.Series, trades: pd.DataFrame, offset: pd.DateOffset | None) -> tuple[pd.Series, pd.DataFrame]:
  179. if offset is None:
  180. return equity, trades
  181. start = equity.index[-1] - offset
  182. scoped_equity = equity[equity.index >= start]
  183. scoped_trades = trades[trades["entry_time"] >= scoped_equity.index[0]] if len(trades) else trades
  184. return scoped_equity, scoped_trades
  185. def metrics(equity: pd.Series, trades: pd.DataFrame) -> dict[str, float | int]:
  186. total = float(equity.iloc[-1] / equity.iloc[0] - 1.0)
  187. years = (equity.index[-1] - equity.index[0]).total_seconds() / 31_536_000
  188. annual = (1.0 + total) ** (1.0 / years) - 1.0 if total > -1.0 and years > 0 else 0.0
  189. drawdown = float(((equity.cummax() - equity) / equity.cummax()).max())
  190. returns = trades["return"] if len(trades) else pd.Series(dtype=float)
  191. wins = returns[returns > 0.0]
  192. losses = returns[returns < 0.0]
  193. return {
  194. "total_return": total,
  195. "annualized_return": annual,
  196. "max_drawdown": drawdown,
  197. "calmar": annual / drawdown if drawdown else 0.0,
  198. "trades": int(len(trades)),
  199. "short_trades": int((trades["side"] == "short").sum()) if len(trades) else 0,
  200. "long_trades": int((trades["side"] == "long").sum()) if len(trades) else 0,
  201. "profit_factor": float(wins.sum() / abs(losses.sum())) if len(losses) else (999.0 if len(wins) else 0.0),
  202. "win_rate": float(len(wins) / len(returns)) if len(returns) else 0.0,
  203. }
  204. def summarize(candidate: Candidate, equity: pd.Series, trades: pd.DataFrame) -> dict[str, object]:
  205. row: dict[str, object] = {
  206. "symbol": SYMBOL,
  207. "bar": candidate.bar,
  208. "family": candidate.family,
  209. "name": candidate.name,
  210. "params_json": json.dumps(candidate.params, separators=(",", ":")),
  211. "first_time": equity.index[0].strftime("%Y-%m-%d %H:%M"),
  212. "last_time": equity.index[-1].strftime("%Y-%m-%d %H:%M"),
  213. }
  214. for label, offset in HORIZONS:
  215. part_equity, part_trades = scoped(equity, trades, offset)
  216. for key, value in metrics(part_equity, part_trades).items():
  217. row[f"{label}_{key}"] = value
  218. row["recent_trigger_score"] = int(row["3m_trades"]) + int(row["30d_trades"]) * 2 + int(row["14d_trades"]) * 4
  219. observe = (
  220. int(row["3m_trades"]) >= 12
  221. and int(row["30d_trades"]) >= 4
  222. and int(row["14d_trades"]) >= 1
  223. and float(row["full_total_return"]) > 0.0
  224. and float(row["3y_total_return"]) > 0.0
  225. and float(row["1y_total_return"]) > 0.0
  226. and float(row["3y_max_drawdown"]) <= 0.35
  227. and float(row["1y_max_drawdown"]) <= 0.25
  228. and float(row["1y_profit_factor"]) >= 1.05
  229. )
  230. row["readonly_observe"] = "yes" if observe else "no"
  231. return row
  232. def horizon_summary(totals: pd.DataFrame) -> pd.DataFrame:
  233. rows = []
  234. for label, _ in HORIZONS:
  235. returns = totals[f"{label}_total_return"]
  236. drawdowns = totals[f"{label}_max_drawdown"]
  237. trades = totals[f"{label}_trades"]
  238. best_index = returns.idxmax()
  239. rows.append(
  240. {
  241. "horizon": label,
  242. "positive_candidates": int((returns > 0.0).sum()),
  243. "non_disaster_candidates": int(((returns > -0.50) & (drawdowns < 0.60)).sum()),
  244. "best_total_return": float(returns.max()),
  245. "median_total_return": float(returns.median()),
  246. "worst_total_return": float(returns.min()),
  247. "median_max_drawdown": float(drawdowns.median()),
  248. "max_trades": int(trades.max()),
  249. "best_name": str(totals.loc[best_index, "name"]),
  250. }
  251. )
  252. return pd.DataFrame(rows)
  253. def markdown_table(frame: pd.DataFrame) -> str:
  254. def cell(value: object) -> str:
  255. if isinstance(value, float):
  256. return f"{value:.4f}"
  257. return str(value).replace("|", "\\|")
  258. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  259. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  260. return "\n".join("| " + " | ".join(cell(value) for value in row) + " |" for row in rows)
  261. def write_report(totals: pd.DataFrame, summary: pd.DataFrame, cache: pd.DataFrame, paths: list[Path], command: str) -> str:
  262. selected = totals[totals["readonly_observe"] == "yes"].head(12)
  263. least_bad = totals.sort_values(
  264. ["full_total_return", "3y_total_return", "1y_total_return", "6m_total_return", "3m_total_return"],
  265. ascending=[False, False, False, False, False],
  266. ).head(12)
  267. recent = totals.sort_values(["recent_trigger_score", "3y_calmar", "1y_calmar"], ascending=[False, False, False]).head(12)
  268. cols = [
  269. "family",
  270. "bar",
  271. "name",
  272. "full_total_return",
  273. "full_max_drawdown",
  274. "full_trades",
  275. "3y_total_return",
  276. "1y_total_return",
  277. "6m_total_return",
  278. "3m_total_return",
  279. "30d_total_return",
  280. "14d_total_return",
  281. "3m_trades",
  282. "30d_trades",
  283. "14d_trades",
  284. "readonly_observe",
  285. ]
  286. observation = (
  287. "No read-only observation candidates passed the rule."
  288. if not len(selected)
  289. else "At least one read-only observation candidate passed the rule."
  290. )
  291. disaster = (
  292. "Long-term status: still disastrous. The full, 3y, and 1y windows have zero positive candidates under the original high-frequency short/bidir candidate set."
  293. if all(int(summary.loc[summary["horizon"] == label, "positive_candidates"].iloc[0]) == 0 for label in ("full", "3y", "1y"))
  294. else "Long-term status: not uniformly disastrous; at least one of full/3y/1y has a positive candidate."
  295. )
  296. return "\n".join(
  297. [
  298. "# ETH high-frequency short/bidirectional revalidation",
  299. "",
  300. f"Run command: `{command}`",
  301. "Scope: offline only; local refreshed 3m/5m/15m OKX ETH candle cache; no live executor, deployment, private API, or order path touched.",
  302. f"Cost model: taker fee `{TAKER_FEE}` each side on `{LEVERAGE:g}x` notional; entries execute on next open.",
  303. "",
  304. "Output files:",
  305. *[f"- `{path}`" for path in paths],
  306. "",
  307. "Cache used:",
  308. "",
  309. markdown_table(cache),
  310. "",
  311. "Windows compared: full, 3y, 1y, 6m, 3m, 30d, 14d.",
  312. "Read-only observation rule: 3m >= 12 trades, 30d >= 4 trades, 14d >= 1 trade, positive full/3y/1y return, 3y MDD <= 35%, 1y MDD <= 25%, and 1y profit factor >= 1.05.",
  313. "",
  314. f"Conclusion: {disaster} {observation}",
  315. "",
  316. "## Horizon Summary",
  317. "",
  318. markdown_table(summary),
  319. "",
  320. "## Read-only Observation Candidates",
  321. "",
  322. markdown_table(selected[cols]) if len(selected) else "No candidates passed the read-only observation rule.",
  323. "",
  324. "## Least-bad Long-term Rows",
  325. "",
  326. markdown_table(least_bad[cols]),
  327. "",
  328. "## Most Recently Active Rows",
  329. "",
  330. markdown_table(recent[cols]),
  331. ]
  332. ) + "\n"
  333. def main() -> int:
  334. parser = argparse.ArgumentParser()
  335. parser.add_argument("--bars", nargs="+", default=["3m", "5m", "15m"])
  336. parser.add_argument("--output-dir", type=Path, default=OUT_DIR)
  337. args = parser.parse_args()
  338. rows: list[dict[str, object]] = []
  339. frames = {bar: load_frame(bar) for bar in args.bars}
  340. for candidate in build_candidates(args.bars):
  341. equity, trades = backtest(frames[candidate.bar], candidate)
  342. if len(equity) >= 2:
  343. rows.append(summarize(candidate, equity, trades))
  344. totals = pd.DataFrame(rows).sort_values(
  345. ["readonly_observe", "full_total_return", "3y_total_return", "1y_total_return", "recent_trigger_score"],
  346. ascending=[False, False, False, False, False],
  347. )
  348. summary = horizon_summary(totals)
  349. cache = load_cache_summary(args.bars)
  350. args.output_dir.mkdir(parents=True, exist_ok=True)
  351. totals_path = args.output_dir / "eth-highfreq-short-bidir-revalidation-candidates.csv"
  352. summary_path = args.output_dir / "eth-highfreq-short-bidir-revalidation-summary.csv"
  353. report_path = args.output_dir / "eth-highfreq-short-bidir-revalidation-report.md"
  354. paths = [totals_path, summary_path, report_path]
  355. totals.to_csv(totals_path, index=False)
  356. summary.to_csv(summary_path, index=False)
  357. command = f"rtk .venv/bin/python {Path(__file__).as_posix()} --bars {' '.join(args.bars)}"
  358. report_path.write_text(write_report(totals, summary, cache, paths, command), encoding="utf-8")
  359. print(summary.to_string(index=False))
  360. return 0
  361. if __name__ == "__main__":
  362. raise SystemExit(main())