search_recent_regime_mean_reversion.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. from __future__ import annotations
  2. import argparse
  3. from dataclasses import dataclass
  4. from pathlib import Path
  5. import pandas as pd
  6. DATA_DIR = Path("data/okx-candles")
  7. OUTPUT_DIR = Path("reports/recent-regime")
  8. PREFIX = "recent-regime-mean-reversion"
  9. SYMBOLS = ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
  10. BARS = ("3m", "5m", "15m")
  11. INITIAL_EQUITY = 10_000.0
  12. ROUNDTRIP_FEE = 0.0008
  13. HORIZONS = (
  14. ("7d", pd.DateOffset(days=7)),
  15. ("14d", pd.DateOffset(days=14)),
  16. ("30d", pd.DateOffset(days=30)),
  17. ("90d", pd.DateOffset(days=90)),
  18. ("6m", pd.DateOffset(months=6)),
  19. ("1y", pd.DateOffset(years=1)),
  20. ("3y", pd.DateOffset(years=3)),
  21. )
  22. @dataclass(frozen=True)
  23. class Spec:
  24. symbol: str
  25. bar: str
  26. side_mode: str
  27. range_lookback: int
  28. compression_window: int
  29. compression_quantile: float
  30. sweep_pct: float
  31. stop_pct: float
  32. take_pct: float
  33. hold: int
  34. @property
  35. def name(self) -> str:
  36. base = self.symbol.split("-")[0].lower()
  37. return (
  38. f"{base}-{self.bar}-fbmr-{self.side_mode}"
  39. f"-rl{self.range_lookback}-cw{self.compression_window}-cq{self.compression_quantile:g}"
  40. f"-sw{self.sweep_pct:g}-sl{self.stop_pct:g}-tp{self.take_pct:g}-h{self.hold}"
  41. )
  42. def load_frame(symbol: str, bar: str, months: int) -> pd.DataFrame:
  43. frame = pd.read_csv(DATA_DIR / symbol / f"{bar}.csv")
  44. frame["ts"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
  45. frame = frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("ts")
  46. start = frame.index[-1] - pd.DateOffset(months=months)
  47. return frame[frame.index >= start].copy()
  48. def signal_frame(frame: pd.DataFrame, spec: Spec) -> pd.DataFrame:
  49. prior_high = frame["high"].shift(1).rolling(spec.range_lookback).max()
  50. prior_low = frame["low"].shift(1).rolling(spec.range_lookback).min()
  51. midpoint = (prior_high + prior_low) / 2.0
  52. width = (prior_high - prior_low) / frame["close"]
  53. width_cap = width.rolling(spec.compression_window).quantile(spec.compression_quantile)
  54. compressed = width <= width_cap
  55. upper_fake = (
  56. compressed
  57. & (frame["high"] >= prior_high * (1.0 + spec.sweep_pct))
  58. & (frame["close"] < prior_high)
  59. & (frame["close"] < frame["open"])
  60. )
  61. lower_fake = (
  62. compressed
  63. & (frame["low"] <= prior_low * (1.0 - spec.sweep_pct))
  64. & (frame["close"] > prior_low)
  65. & (frame["close"] > frame["open"])
  66. )
  67. if spec.side_mode == "short":
  68. lower_fake = pd.Series(False, index=frame.index)
  69. elif spec.side_mode == "long":
  70. upper_fake = pd.Series(False, index=frame.index)
  71. return pd.DataFrame(
  72. {
  73. "long_entry": lower_fake.fillna(False),
  74. "short_entry": upper_fake.fillna(False),
  75. "long_midpoint": midpoint,
  76. "short_midpoint": midpoint,
  77. },
  78. index=frame.index,
  79. )
  80. def trade_return(side: str, entry_price: float, exit_price: float) -> float:
  81. gross = exit_price / entry_price - 1.0 if side == "long" else entry_price / exit_price - 1.0
  82. return gross - ROUNDTRIP_FEE
  83. def exit_price(position: dict[str, object], row: object) -> float | None:
  84. side = str(position["side"])
  85. stop = float(position["stop"])
  86. take = float(position["take"])
  87. midpoint = float(position["midpoint"])
  88. if side == "long":
  89. if float(row.open) <= stop or float(row.open) >= take:
  90. return float(row.open)
  91. if float(row.low) <= stop:
  92. return stop
  93. if float(row.high) >= take:
  94. return take
  95. if float(row.high) >= midpoint:
  96. return midpoint
  97. else:
  98. if float(row.open) >= stop or float(row.open) <= take:
  99. return float(row.open)
  100. if float(row.high) >= stop:
  101. return stop
  102. if float(row.low) <= take:
  103. return take
  104. if float(row.low) <= midpoint:
  105. return midpoint
  106. return None
  107. def run_spec(frame: pd.DataFrame, spec: Spec) -> tuple[pd.Series, list[dict[str, object]]]:
  108. signals = signal_frame(frame, spec)
  109. warmup = spec.range_lookback + spec.compression_window + 2
  110. trades: list[dict[str, object]] = []
  111. rows = list(frame.itertuples())
  112. short_indices = set(signals.index[signals["short_entry"]].to_series().map(frame.index.get_loc).astype(int))
  113. long_indices = set(signals.index[signals["long_entry"]].to_series().map(frame.index.get_loc).astype(int))
  114. index = warmup
  115. while index < len(rows) - 1:
  116. side = "short" if index in short_indices else "long" if index in long_indices else ""
  117. if not side:
  118. index += 1
  119. continue
  120. entry_index = index + 1
  121. entry_row = rows[entry_index]
  122. entry = float(entry_row.open)
  123. position = {
  124. "side": side,
  125. "entry_time": frame.index[entry_index],
  126. "entry_index": entry_index,
  127. "entry_price": entry,
  128. "stop": entry * (1.0 - spec.stop_pct if side == "long" else 1.0 + spec.stop_pct),
  129. "take": entry * (1.0 + spec.take_pct if side == "long" else 1.0 - spec.take_pct),
  130. "midpoint": float(signals[f"{side}_midpoint"].iloc[index]),
  131. }
  132. exit_index = min(entry_index + spec.hold, len(rows) - 1)
  133. price = float(rows[exit_index].close)
  134. for scan_index in range(entry_index, exit_index + 1):
  135. found = exit_price(position, rows[scan_index])
  136. if found is not None:
  137. exit_index = scan_index
  138. price = found
  139. break
  140. trades.append(
  141. {
  142. "side": side,
  143. "entry_time": position["entry_time"],
  144. "exit_time": frame.index[exit_index],
  145. "return": trade_return(side, entry, price),
  146. }
  147. )
  148. index = exit_index + 1
  149. daily_index = pd.date_range(frame.index[0].normalize(), frame.index[-1].normalize(), freq="1D", tz="UTC")
  150. if not trades:
  151. return pd.Series(INITIAL_EQUITY, index=daily_index), trades
  152. returns = pd.DataFrame(
  153. {
  154. "date": [pd.Timestamp(trade["exit_time"]).normalize() for trade in trades],
  155. "return": [float(trade["return"]) for trade in trades],
  156. }
  157. )
  158. daily_returns = returns.groupby("date")["return"].apply(lambda values: (1.0 + values).prod() - 1.0)
  159. daily_returns = daily_returns.reindex(daily_index, fill_value=0.0)
  160. daily = INITIAL_EQUITY * (1.0 + daily_returns).cumprod()
  161. daily.iloc[0] = INITIAL_EQUITY
  162. return daily, trades
  163. def max_drawdown(series: pd.Series) -> float:
  164. return float(((series.cummax() - series) / series.cummax()).max())
  165. def metrics(series: pd.Series, trades: list[dict[str, object]], start: pd.Timestamp) -> dict[str, object]:
  166. scoped = series[series.index >= start]
  167. scoped_trades = [trade for trade in trades if pd.Timestamp(trade["entry_time"]) >= scoped.index[0]]
  168. years = max((scoped.index[-1] - scoped.index[0]).total_seconds() / 86_400.0 / 365.0, 1e-9)
  169. total_return = float(scoped.iloc[-1] / scoped.iloc[0] - 1.0)
  170. annualized = (1.0 + total_return) ** (1.0 / years) - 1.0 if total_return > -1.0 else -1.0
  171. returns = [float(trade["return"]) for trade in scoped_trades]
  172. wins = [value for value in returns if value > 0.0]
  173. losses = [value for value in returns if value < 0.0]
  174. gross_profit = sum(wins)
  175. gross_loss = abs(sum(losses))
  176. avg_win = gross_profit / len(wins) if wins else 0.0
  177. avg_loss = gross_loss / len(losses) if losses else 0.0
  178. drawdown = max_drawdown(scoped)
  179. return {
  180. "start": scoped.index[0].strftime("%Y-%m-%d"),
  181. "end": scoped.index[-1].strftime("%Y-%m-%d"),
  182. "total_return": total_return,
  183. "annualized": annualized,
  184. "max_drawdown": drawdown,
  185. "calmar": annualized / drawdown if drawdown else 0.0,
  186. "trades": len(returns),
  187. "win_rate": len(wins) / len(returns) if returns else 0.0,
  188. "profit_factor": gross_profit / gross_loss if gross_loss else 0.0,
  189. "payoff_ratio": avg_win / avg_loss if avg_loss else 0.0,
  190. }
  191. def total_row(spec: Spec, series: pd.Series, trades: list[dict[str, object]]) -> dict[str, object]:
  192. row = {
  193. "name": spec.name,
  194. "symbol": spec.symbol,
  195. "bar": spec.bar,
  196. "side_mode": spec.side_mode,
  197. "range_lookback": spec.range_lookback,
  198. "compression_window": spec.compression_window,
  199. "compression_quantile": spec.compression_quantile,
  200. "sweep_pct": spec.sweep_pct,
  201. "stop_pct": spec.stop_pct,
  202. "take_pct": spec.take_pct,
  203. "hold": spec.hold,
  204. }
  205. row.update(metrics(series, trades, series.index[0]))
  206. return row
  207. def horizon_rows(spec: Spec, series: pd.Series, trades: list[dict[str, object]]) -> list[dict[str, object]]:
  208. rows = []
  209. for label, offset in HORIZONS:
  210. start = max(series.index[0], series.index[-1] - offset)
  211. row = {
  212. "name": spec.name,
  213. "symbol": spec.symbol,
  214. "bar": spec.bar,
  215. "side_mode": spec.side_mode,
  216. "horizon": label,
  217. }
  218. row.update(metrics(series, trades, start))
  219. rows.append(row)
  220. return rows
  221. def monthly_rows(spec: Spec, series: pd.Series, trades: list[dict[str, object]]) -> pd.DataFrame:
  222. monthly = series.resample("ME").last()
  223. frame = pd.DataFrame(
  224. {
  225. "name": spec.name,
  226. "symbol": spec.symbol,
  227. "bar": spec.bar,
  228. "side_mode": spec.side_mode,
  229. "month": monthly.index.strftime("%Y-%m"),
  230. "start_equity": monthly.shift(1).fillna(series.iloc[0]).to_numpy(),
  231. "end_equity": monthly.to_numpy(),
  232. }
  233. )
  234. frame["total_return"] = frame["end_equity"] / frame["start_equity"] - 1.0
  235. trade_months = pd.Series([pd.Timestamp(trade["entry_time"]).strftime("%Y-%m") for trade in trades], dtype=object)
  236. counts = trade_months.value_counts() if len(trade_months) else pd.Series(dtype=int)
  237. frame["trades"] = frame["month"].map(counts).fillna(0).astype(int)
  238. return frame
  239. def build_specs() -> list[Spec]:
  240. specs: list[Spec] = []
  241. bar_holds = {"3m": 30, "5m": 18, "15m": 10}
  242. for symbol in SYMBOLS:
  243. for bar in BARS:
  244. for side_mode in ("short", "long", "bidir"):
  245. for range_lookback in (24, 48):
  246. for compression_quantile in (0.20, 0.35):
  247. for sweep_pct in (0.0008, 0.0016):
  248. specs.append(
  249. Spec(
  250. symbol=symbol,
  251. bar=bar,
  252. side_mode=side_mode,
  253. range_lookback=range_lookback,
  254. compression_window=range_lookback * 6,
  255. compression_quantile=compression_quantile,
  256. sweep_pct=sweep_pct,
  257. stop_pct=0.006,
  258. take_pct=0.008,
  259. hold=bar_holds[bar],
  260. )
  261. )
  262. return specs
  263. def markdown_table(frame: pd.DataFrame) -> str:
  264. def cell(value: object) -> str:
  265. if isinstance(value, float):
  266. return f"{value:.4f}"
  267. return str(value).replace("|", "\\|")
  268. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  269. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  270. return "\n".join("| " + " | ".join(cell(value) for value in row) + " |" for row in rows)
  271. def report_text(totals: pd.DataFrame, horizons: pd.DataFrame, monthly: pd.DataFrame, selected_name: str) -> str:
  272. selected_horizons = horizons[horizons["name"] == selected_name]
  273. selected_monthly = monthly[monthly["name"] == selected_name]
  274. top_cols = [
  275. "name",
  276. "symbol",
  277. "bar",
  278. "side_mode",
  279. "total_return",
  280. "annualized",
  281. "max_drawdown",
  282. "calmar",
  283. "trades",
  284. "win_rate",
  285. "profit_factor",
  286. "payoff_ratio",
  287. ]
  288. active_months = selected_monthly[selected_monthly["trades"] > 0]
  289. return "\n".join(
  290. [
  291. "# Recent Regime False Breakout Mean Reversion",
  292. "",
  293. "Scope: ETH/BTC perpetual swap local OKX candles, 3m/5m/15m, most recent 36 months only. No network and no live executor changes.",
  294. "",
  295. "Signal definition: compressed rolling range, sweep beyond the prior range, close back inside, enter the opposite side on next open, exit at range midpoint, stop, take-profit, or max hold.",
  296. "",
  297. f"Selected by Calmar then annualized return among candidates with at least 30 trades: `{selected_name}`.",
  298. "",
  299. "## Selected Horizons",
  300. "",
  301. markdown_table(selected_horizons[["horizon", "total_return", "annualized", "max_drawdown", "calmar", "trades", "win_rate", "profit_factor", "payoff_ratio"]]),
  302. "",
  303. "## Selected Monthly Summary",
  304. "",
  305. f"Months: {len(selected_monthly)}, active months: {len(active_months)}, positive active months: {int((active_months['total_return'] > 0.0).sum())}.",
  306. "",
  307. "Worst active months:",
  308. "",
  309. markdown_table(active_months.sort_values("total_return").head(10)[["month", "total_return", "trades"]]),
  310. "",
  311. "## Top Candidates",
  312. "",
  313. markdown_table(totals.head(15)[top_cols]),
  314. "",
  315. "## Output Files",
  316. "",
  317. f"- `{PREFIX}-total.csv`",
  318. f"- `{PREFIX}-horizons.csv`",
  319. f"- `{PREFIX}-monthly.csv`",
  320. f"- `{PREFIX}-report.md`",
  321. "",
  322. ]
  323. )
  324. def main() -> int:
  325. parser = argparse.ArgumentParser()
  326. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  327. parser.add_argument("--months", type=int, default=36)
  328. args = parser.parse_args()
  329. frames = {(symbol, bar): load_frame(symbol, bar, args.months) for symbol in SYMBOLS for bar in BARS}
  330. total_data: list[dict[str, object]] = []
  331. horizon_data: list[dict[str, object]] = []
  332. monthly_parts: list[pd.DataFrame] = []
  333. for index, spec in enumerate(build_specs(), start=1):
  334. series, trades = run_spec(frames[(spec.symbol, spec.bar)], spec)
  335. total_data.append(total_row(spec, series, trades))
  336. horizon_data.extend(horizon_rows(spec, series, trades))
  337. monthly_parts.append(monthly_rows(spec, series, trades))
  338. if index % 100 == 0:
  339. print(f"done {index}", flush=True)
  340. totals = pd.DataFrame(total_data).sort_values(["calmar", "annualized", "trades"], ascending=[False, False, False])
  341. horizons = pd.DataFrame(horizon_data)
  342. monthly = pd.concat(monthly_parts, ignore_index=True)
  343. trade_eligible = totals[totals["trades"] >= 30]
  344. selected = (trade_eligible if len(trade_eligible) else totals).iloc[0]
  345. args.output_dir.mkdir(parents=True, exist_ok=True)
  346. total_path = args.output_dir / f"{PREFIX}-total.csv"
  347. horizon_path = args.output_dir / f"{PREFIX}-horizons.csv"
  348. monthly_path = args.output_dir / f"{PREFIX}-monthly.csv"
  349. report_path = args.output_dir / f"{PREFIX}-report.md"
  350. totals.to_csv(total_path, index=False)
  351. horizons.to_csv(horizon_path, index=False)
  352. monthly.to_csv(monthly_path, index=False)
  353. report_path.write_text(report_text(totals, horizons, monthly, str(selected["name"])), encoding="utf-8")
  354. print(totals.head(10).to_string(index=False))
  355. print(f"wrote {total_path}, {horizon_path}, {monthly_path}, {report_path}")
  356. return 0
  357. if __name__ == "__main__":
  358. raise SystemExit(main())