search_eth_btc_calendar_carry.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. from __future__ import annotations
  2. from dataclasses import dataclass
  3. from pathlib import Path
  4. import pandas as pd
  5. DATA_DIR = Path("data/okx-candles")
  6. OUT_DIR = Path("reports/eth-exploration")
  7. PREFIX = "eth-btc-calendar-carry"
  8. SYMBOLS = ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
  9. INITIAL_EQUITY = 10_000.0
  10. FEE = 0.0004
  11. ROUNDTRIP_FEE = FEE * 2
  12. HORIZONS = (
  13. ("full", None),
  14. ("3y", pd.DateOffset(years=3)),
  15. ("1y", pd.DateOffset(years=1)),
  16. ("6m", pd.DateOffset(months=6)),
  17. ("3m", pd.DateOffset(months=3)),
  18. )
  19. WEEKDAY_SETS = {
  20. "all": set(range(7)),
  21. "weekday": set(range(5)),
  22. "weekend": {5, 6},
  23. }
  24. @dataclass(frozen=True)
  25. class Spec:
  26. symbol: str
  27. bar: str
  28. side: str
  29. hour: int
  30. weekdays: str
  31. hold: int
  32. vol_gate: str
  33. @property
  34. def name(self) -> str:
  35. token = self.symbol.split("-")[0].lower()
  36. return f"{token}-{self.bar}-{self.side}-h{self.hour:02d}-{self.weekdays}-hold{self.hold}-vol{self.vol_gate}"
  37. def load_frame(symbol: str) -> pd.DataFrame:
  38. frame = pd.read_csv(DATA_DIR / symbol / "15m.csv")
  39. frame["ts"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
  40. return frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("ts")
  41. def resample(frame: pd.DataFrame, bar: str) -> pd.DataFrame:
  42. rule = {"15m": "15min", "1h": "1h", "4h": "4h"}[bar]
  43. return (
  44. frame.resample(rule, label="left", closed="left")
  45. .agg(open=("open", "first"), high=("high", "max"), low=("low", "min"), close=("close", "last"), volume=("volume", "sum"))
  46. .dropna()
  47. )
  48. def build_specs() -> list[Spec]:
  49. specs: list[Spec] = []
  50. for symbol in SYMBOLS:
  51. for bar, hours, holds in (
  52. ("1h", range(24), (2, 4, 8)),
  53. ("4h", range(0, 24, 4), (1, 2, 4)),
  54. ):
  55. for side in ("short", "long"):
  56. for hour in hours:
  57. for weekdays in WEEKDAY_SETS:
  58. for hold in holds:
  59. for vol_gate in ("none", "calm", "active"):
  60. specs.append(Spec(symbol, bar, side, hour, weekdays, hold, vol_gate))
  61. return specs
  62. def trade_return(side: str, entry: float, exit_: float) -> float:
  63. gross = exit_ / entry - 1.0 if side == "long" else entry / exit_ - 1.0
  64. return gross - ROUNDTRIP_FEE
  65. def marked_equity(equity: float, side: str, entry: float, mark: float) -> float:
  66. gross = mark / entry - 1.0 if side == "long" else entry / mark - 1.0
  67. return equity * (1.0 + gross - FEE)
  68. def run_spec(spec: Spec, frame: pd.DataFrame) -> tuple[pd.Series, pd.DataFrame]:
  69. returns = frame["close"].pct_change()
  70. vol = returns.rolling(96).std(ddof=0)
  71. vol_rank = vol.rolling(720).rank(pct=True)
  72. allowed_weekdays = WEEKDAY_SETS[spec.weekdays]
  73. entry_signal = (frame.index.hour == spec.hour) & pd.Series(frame.index.weekday, index=frame.index).isin(allowed_weekdays)
  74. if spec.vol_gate == "calm":
  75. entry_signal &= vol_rank <= 0.5
  76. elif spec.vol_gate == "active":
  77. entry_signal &= vol_rank >= 0.5
  78. trades: list[dict[str, object]] = []
  79. warmup = 800
  80. opens = frame["open"].to_numpy(dtype=float)
  81. signal_indices = [int(value) for value in entry_signal.iloc[warmup:].to_numpy().nonzero()[0] + warmup]
  82. last_exit_index = -1
  83. equity = INITIAL_EQUITY
  84. equity_points: list[tuple[pd.Timestamp, float]] = [(frame.index[warmup].normalize(), equity)]
  85. for signal_index in signal_indices:
  86. entry_index = signal_index + 1
  87. exit_index = entry_index + spec.hold
  88. if entry_index <= last_exit_index or exit_index >= len(frame):
  89. continue
  90. entry_price = opens[entry_index]
  91. exit_price = opens[exit_index]
  92. net = trade_return(spec.side, entry_price, exit_price)
  93. before = equity
  94. equity *= 1.0 + net
  95. trades.append(
  96. {
  97. "entry_time": frame.index[entry_index],
  98. "exit_time": frame.index[exit_index],
  99. "side": spec.side,
  100. "return": net,
  101. "pnl": equity - before,
  102. }
  103. )
  104. equity_points.append((frame.index[exit_index], equity))
  105. last_exit_index = exit_index
  106. series = pd.Series({ts: value for ts, value in equity_points}).sort_index()
  107. daily_index = pd.date_range(frame.index[warmup].normalize(), frame.index[-1].normalize(), freq="1D", tz="UTC")
  108. daily = series.reindex(daily_index.union(series.index)).sort_index().ffill().reindex(daily_index)
  109. return daily, pd.DataFrame(trades)
  110. def metrics(equity: pd.Series, trades: pd.DataFrame, offset: pd.DateOffset | None) -> dict[str, float | int]:
  111. start = equity.index[0] if offset is None else equity.index[-1] - offset
  112. scoped = equity[equity.index >= start]
  113. scoped_trades = trades[trades["entry_time"] >= scoped.index[0]] if len(trades) else trades
  114. total = float(scoped.iloc[-1] / scoped.iloc[0] - 1.0)
  115. years = (scoped.index[-1] - scoped.index[0]).total_seconds() / 31_536_000
  116. annual = (1.0 + total) ** (1.0 / years) - 1.0 if total > -1.0 and years > 0.0 else -1.0
  117. drawdown = float(((scoped.cummax() - scoped) / scoped.cummax()).max())
  118. rets = scoped_trades["return"].astype(float).tolist() if len(scoped_trades) else []
  119. wins = [value for value in rets if value > 0.0]
  120. losses = [value for value in rets if value < 0.0]
  121. pf = sum(wins) / abs(sum(losses)) if losses else (999.0 if wins else 0.0)
  122. return {
  123. "total_return": total,
  124. "annualized_return": annual,
  125. "max_drawdown": drawdown,
  126. "win_rate": len(wins) / len(rets) if rets else 0.0,
  127. "profit_factor": pf,
  128. "trades": len(rets),
  129. }
  130. def row_for(spec: Spec, equity: pd.Series, trades: pd.DataFrame) -> dict[str, object]:
  131. row: dict[str, object] = {
  132. "name": spec.name,
  133. "symbol": spec.symbol,
  134. "bar": spec.bar,
  135. "side": spec.side,
  136. "hour": spec.hour,
  137. "weekdays": spec.weekdays,
  138. "hold": spec.hold,
  139. "vol_gate": spec.vol_gate,
  140. }
  141. for label, offset in HORIZONS:
  142. for key, value in metrics(equity, trades, offset).items():
  143. row[f"{label}_{key}"] = value
  144. return row
  145. def monthly_stability(equity: pd.Series) -> pd.DataFrame:
  146. month_end = equity.resample("ME").last()
  147. month_start = equity.resample("ME").first()
  148. monthly = month_end / month_start - 1.0
  149. rows = []
  150. for year, values in monthly.groupby(monthly.index.year):
  151. clean = values.dropna()
  152. if len(clean) == 0:
  153. continue
  154. rows.append(
  155. {
  156. "year": int(year),
  157. "months": int(len(clean)),
  158. "total_return": float((1.0 + clean).prod() - 1.0),
  159. "positive_month_rate": float((clean > 0.0).mean()),
  160. "worst_month": float(clean.min()),
  161. "best_month": float(clean.max()),
  162. }
  163. )
  164. return pd.DataFrame(rows)
  165. def markdown_table(frame: pd.DataFrame) -> str:
  166. def cell(value: object) -> str:
  167. if isinstance(value, float):
  168. return f"{value:.4f}"
  169. return str(value).replace("|", "\\|")
  170. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  171. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  172. return "\n".join("| " + " | ".join(cell(value) for value in row) + " |" for row in rows)
  173. def report(totals: pd.DataFrame, selected: pd.Series, selected_metrics: pd.DataFrame, stability: pd.DataFrame) -> str:
  174. positive = totals[
  175. (totals["full_total_return"] > 0.0)
  176. & (totals["3y_total_return"] > 0.0)
  177. & (totals["1y_total_return"] > 0.0)
  178. & (totals["6m_total_return"] > 0.0)
  179. & (totals["3m_total_return"] > 0.0)
  180. ]
  181. verdict = "not worth continuing"
  182. if (
  183. selected["full_profit_factor"] >= 1.08
  184. and selected["full_trades"] >= 80
  185. and selected["3m_trades"] >= 5
  186. and float(stability["positive_month_rate"].mean()) >= 0.50
  187. ):
  188. verdict = "worth continuing only as a small calendar-anomaly research branch; not suitable as a short-biased or bidirectional promotion yet"
  189. keep = [
  190. "name",
  191. "symbol",
  192. "bar",
  193. "side",
  194. "hour",
  195. "weekdays",
  196. "hold",
  197. "vol_gate",
  198. "full_total_return",
  199. "full_annualized_return",
  200. "full_max_drawdown",
  201. "full_win_rate",
  202. "full_profit_factor",
  203. "full_trades",
  204. "3y_total_return",
  205. "1y_total_return",
  206. "6m_total_return",
  207. "3m_total_return",
  208. ]
  209. directional = (
  210. positive.sort_values(["calmar", "full_profit_factor"], ascending=[False, False])
  211. .groupby(["symbol", "side"], as_index=False)
  212. .head(1)
  213. )
  214. directional_table = markdown_table(directional[keep]) if len(directional) else "No all-window-positive directional candidates."
  215. short_table = markdown_table(positive[positive["side"] == "short"].sort_values(["calmar", "full_profit_factor"], ascending=[False, False])[keep])
  216. if not len(positive[positive["side"] == "short"]):
  217. short_table = "No short all-window-positive candidates."
  218. return (
  219. "# ETH/BTC Calendar Carry Search\n\n"
  220. "Scope: local OKX candles only; no live path; single-entry fixed-hold time bucket rules; no crash-follow, no ETH/BTC relative momentum, no staged entry.\n\n"
  221. f"Selected candidate: `{selected['name']}`.\n\n"
  222. f"All-window-positive candidates: {len(positive)} / {len(totals)}.\n\n"
  223. f"Verdict: {verdict}.\n\n"
  224. "## Selected Metrics\n\n"
  225. f"{markdown_table(selected_metrics)}\n\n"
  226. "## Year Stability\n\n"
  227. f"{markdown_table(stability)}\n\n"
  228. "## Directional Check\n\n"
  229. f"{directional_table}\n\n"
  230. "## Short Candidates\n\n"
  231. f"{short_table}\n\n"
  232. "## Top 10 Candidates\n\n"
  233. f"{markdown_table(totals.head(10)[keep])}\n"
  234. )
  235. def main() -> int:
  236. frames = {
  237. (symbol, bar): resample(load_frame(symbol), bar)
  238. for symbol in SYMBOLS
  239. for bar in ("1h", "4h")
  240. }
  241. rows = []
  242. selected_equity: pd.Series | None = None
  243. selected_trades: pd.DataFrame | None = None
  244. selected_spec: Spec | None = None
  245. equities: dict[str, tuple[Spec, pd.Series, pd.DataFrame]] = {}
  246. specs = build_specs()
  247. for index, spec in enumerate(specs, 1):
  248. equity, trades = run_spec(spec, frames[(spec.symbol, spec.bar)])
  249. row = row_for(spec, equity, trades)
  250. rows.append(row)
  251. equities[spec.name] = (spec, equity, trades)
  252. if index % 1000 == 0:
  253. print(f"done {index}/{len(specs)}", flush=True)
  254. totals = pd.DataFrame(rows)
  255. totals["calmar"] = totals["full_annualized_return"] / totals["full_max_drawdown"].replace(0.0, pd.NA)
  256. eligible = totals[
  257. (totals["full_total_return"] > 0.0)
  258. & (totals["3y_total_return"] > 0.0)
  259. & (totals["1y_total_return"] > 0.0)
  260. & (totals["6m_total_return"] > 0.0)
  261. & (totals["3m_total_return"] > 0.0)
  262. & (totals["full_trades"] >= 50)
  263. ]
  264. ranked = (eligible if len(eligible) else totals).sort_values(
  265. ["calmar", "full_profit_factor", "3m_total_return", "full_trades"],
  266. ascending=[False, False, False, False],
  267. )
  268. totals = totals.sort_values(["calmar", "full_profit_factor", "3m_total_return"], ascending=[False, False, False])
  269. selected_name = str(ranked.iloc[0]["name"])
  270. selected_spec, selected_equity, selected_trades = equities[selected_name]
  271. selected = totals[totals["name"] == selected_name].iloc[0]
  272. selected_rows = []
  273. for label, offset in HORIZONS:
  274. row = {"period": label}
  275. row.update(metrics(selected_equity, selected_trades, offset))
  276. selected_rows.append(row)
  277. selected_metrics = pd.DataFrame(selected_rows)
  278. stability = monthly_stability(selected_equity)
  279. OUT_DIR.mkdir(parents=True, exist_ok=True)
  280. totals_path = OUT_DIR / f"{PREFIX}-totals.csv"
  281. stability_path = OUT_DIR / f"{PREFIX}-stability.csv"
  282. report_path = OUT_DIR / f"{PREFIX}-report.md"
  283. totals.to_csv(totals_path, index=False)
  284. stability.to_csv(stability_path, index=False)
  285. report_path.write_text(report(totals, selected, selected_metrics, stability), encoding="utf-8")
  286. print(selected_metrics.to_string(index=False))
  287. print(f"wrote {totals_path}, {stability_path}, {report_path}")
  288. return 0
  289. if __name__ == "__main__":
  290. raise SystemExit(main())