search_eth_strategy_portfolio_10y.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. from __future__ import annotations
  2. import argparse
  3. import sys
  4. from dataclasses import dataclass
  5. from itertools import combinations
  6. from pathlib import Path
  7. import pandas as pd
  8. sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
  9. from scripts import explore_ultrashort as explore
  10. from scripts import search_eth_price_twap_variants as price_twap
  11. OUTPUT_DIR = Path("reports/eth-exploration")
  12. YEARS = 10.0
  13. PRIMARY_COST = "maker_taker"
  14. COSTS = {
  15. "maker_maker": 0.0012,
  16. "maker_taker": 0.0021,
  17. "taker_taker": 0.0030,
  18. }
  19. HORIZONS = (
  20. ("3y", pd.DateOffset(years=3)),
  21. ("1y", pd.DateOffset(years=1)),
  22. ("6m", pd.DateOffset(months=6)),
  23. ("3m", pd.DateOffset(months=3)),
  24. )
  25. @dataclass(frozen=True)
  26. class Strategy:
  27. family: str
  28. bar: str
  29. candidate: object
  30. pair: bool
  31. def twap_candidate_name(candidate: object) -> str:
  32. if isinstance(candidate, dict):
  33. return str(candidate["name"])
  34. return str(candidate.name)
  35. def build_strategies() -> list[Strategy]:
  36. return [
  37. Strategy(
  38. "eth_price_twap_deep",
  39. "15m",
  40. {
  41. "name": "rsi2-long-guarded-price-twap-o0.0030-0.0060-0.0090-v2-t160-l5.0-x50.0-sl0.012-mh48",
  42. "spec": {
  43. "trend_sma": 160,
  44. "rsi_threshold": 5.0,
  45. "exit_rsi": 50.0,
  46. "stop_loss_pct": 0.012,
  47. "max_hold_bars": 48,
  48. "entry_offsets": (0.003, 0.006, 0.009),
  49. "entry_valid_bars": 2,
  50. "fill_buffer": 0.0,
  51. },
  52. },
  53. False,
  54. ),
  55. Strategy(
  56. "eth_price_twap_mid",
  57. "15m",
  58. {
  59. "name": "rsi2-long-guarded-price-twap-o0.0010-0.0030-0.0050-v2-t160-l5.0-x55.0-sl0.008-mh48",
  60. "spec": {
  61. "trend_sma": 160,
  62. "rsi_threshold": 5.0,
  63. "exit_rsi": 55.0,
  64. "stop_loss_pct": 0.008,
  65. "max_hold_bars": 48,
  66. "entry_offsets": (0.001, 0.003, 0.005),
  67. "entry_valid_bars": 2,
  68. "fill_buffer": 0.0,
  69. },
  70. },
  71. False,
  72. ),
  73. Strategy(
  74. "eth_rsi2_market",
  75. "15m",
  76. explore.build_rsi2_long_guarded_candidate(240, 5.0, 45.0, 0.006, 48),
  77. False,
  78. ),
  79. Strategy(
  80. "eth_btc_rsi_filter",
  81. "15m",
  82. explore.build_eth_btc_rsi_filter_candidate(50, 3.0, 55.0, 120, 240, 0.0),
  83. True,
  84. ),
  85. Strategy(
  86. "eth_btc_rsi_filter",
  87. "15m",
  88. explore.build_eth_btc_rsi_filter_candidate(50, 3.0, 55.0, 480, 240, 0.0),
  89. True,
  90. ),
  91. Strategy(
  92. "btc_lead_eth_lag_15m",
  93. "15m",
  94. explore.build_btc_lead_eth_lag_candidate(8, 0.018, 0.006, 8, 0.006, 0.018),
  95. True,
  96. ),
  97. Strategy(
  98. "btc_lead_eth_lag_15m",
  99. "15m",
  100. explore.build_btc_lead_eth_lag_candidate(16, 0.024, 0.006, 32, 0.006, 0.018),
  101. True,
  102. ),
  103. Strategy(
  104. "btc_lead_eth_lag_5m",
  105. "5m",
  106. explore.build_btc_lead_eth_lag_candidate(16, 0.012, 0.006, 32, 0.006, 0.018),
  107. True,
  108. ),
  109. Strategy(
  110. "btc_lead_eth_lag_5m",
  111. "5m",
  112. explore.build_btc_lead_eth_lag_candidate(16, 0.012, 0.006, 8, 0.006, 0.018),
  113. True,
  114. ),
  115. ]
  116. def load_candles(symbol: str, bar: str, years: float) -> list[explore.Candle]:
  117. candles, exhausted = explore.load_cached_candles(explore.CANDLE_CACHE_DIR, symbol, bar)
  118. if not candles:
  119. raise FileNotFoundError(f"missing cached candles for {symbol} {bar}")
  120. requested = explore.history_bars_for_years(bar, years)
  121. return candles[-requested:] if len(candles) > requested else candles
  122. def run_strategy(strategy: Strategy, data: dict[tuple[str, str], list[explore.Candle]]) -> explore.SegmentResult:
  123. eth = data[("ETH-USDT-SWAP", strategy.bar)]
  124. if isinstance(strategy.candidate, dict):
  125. return price_twap.run_price_twap_segment(
  126. candles=eth,
  127. spec=strategy.candidate["spec"],
  128. roundtrip_cost_on_margin=0.0,
  129. )
  130. if strategy.pair:
  131. btc = data[("BTC-USDT-SWAP", strategy.bar)]
  132. eth, btc = explore.align_pair_candles(eth, btc)
  133. return strategy.candidate.run(
  134. eth_candles=eth,
  135. btc_candles=btc,
  136. leverage=explore.LEVERAGE,
  137. warmup_bars=strategy.candidate.warmup_bars,
  138. )
  139. return strategy.candidate.run(
  140. candles=eth,
  141. leverage=explore.LEVERAGE,
  142. warmup_bars=strategy.candidate.warmup_bars,
  143. )
  144. def daily_equity(frame: pd.DataFrame, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
  145. series = frame.set_index("ts")["equity"].sort_index()
  146. index = pd.date_range(start.normalize(), end.normalize(), freq="1D", tz="UTC")
  147. return series.reindex(index.union(series.index)).sort_index().ffill().reindex(index).ffill()
  148. def metrics_from_daily_equity(series: pd.Series) -> dict[str, float]:
  149. years = (series.index[-1] - series.index[0]).total_seconds() / 86_400 / 365
  150. total_return = float(series.iloc[-1] / series.iloc[0] - 1.0)
  151. annualized_return = (1.0 + total_return) ** (1.0 / years) - 1.0 if total_return > -1.0 and years > 0.0 else 0.0
  152. max_drawdown = explore.max_drawdown_from_equity([float(value) for value in series])
  153. returns = series.pct_change().dropna()
  154. daily_std = float(returns.std(ddof=1)) if len(returns) > 1 else 0.0
  155. sharpe = float(returns.mean()) / daily_std * (365 ** 0.5) if daily_std else 0.0
  156. return {
  157. "net_total_return": total_return,
  158. "net_annualized_return": annualized_return,
  159. "net_max_drawdown": max_drawdown,
  160. "net_calmar": annualized_return / max_drawdown if max_drawdown else 0.0,
  161. "net_sharpe_daily": sharpe,
  162. }
  163. def horizon_rows(name: str, series: pd.Series) -> list[dict[str, object]]:
  164. rows: list[dict[str, object]] = []
  165. end_time = series.index[-1]
  166. for label, offset in HORIZONS:
  167. cutoff = end_time - offset
  168. horizon = series[series.index >= cutoff]
  169. if len(horizon) < 2:
  170. horizon = series
  171. rows.append(
  172. {
  173. "portfolio": name,
  174. "horizon": label,
  175. "horizon_start": horizon.index[0].strftime("%Y-%m-%d"),
  176. "horizon_end": horizon.index[-1].strftime("%Y-%m-%d"),
  177. **metrics_from_daily_equity(horizon),
  178. }
  179. )
  180. return rows
  181. def combine_daily_returns(
  182. *,
  183. name: str,
  184. legs: tuple[str, ...],
  185. mode: str,
  186. daily: dict[str, pd.Series],
  187. strategy_metrics: dict[str, dict[str, float]],
  188. ) -> pd.Series:
  189. returns = pd.DataFrame({leg: daily[leg].pct_change().fillna(0.0) for leg in legs}).dropna()
  190. if mode == "equal":
  191. weights = pd.Series(1.0 / len(legs), index=legs)
  192. else:
  193. raw = pd.Series({leg: 1.0 / max(strategy_metrics[leg]["net_max_drawdown"], 0.01) for leg in legs})
  194. weights = raw / raw.sum()
  195. equity = explore.INITIAL_EQUITY * (1.0 + returns.mul(weights, axis=1).sum(axis=1)).cumprod()
  196. equity.name = name
  197. return equity
  198. def monthly_rows(portfolio: str, series: pd.Series) -> pd.DataFrame:
  199. monthly = series.resample("ME").last()
  200. frame = pd.DataFrame(
  201. {
  202. "portfolio": portfolio,
  203. "month": monthly.index.strftime("%Y-%m"),
  204. "start_equity": monthly.shift(1).fillna(series.iloc[0]).to_numpy(),
  205. "end_equity": monthly.to_numpy(),
  206. }
  207. )
  208. frame["return"] = frame["end_equity"] / frame["start_equity"] - 1.0
  209. return frame
  210. def markdown_table(frame: pd.DataFrame) -> str:
  211. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  212. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  213. return "\n".join("| " + " | ".join(format_cell(value) for value in row) + " |" for row in rows)
  214. def format_cell(value: object) -> str:
  215. if isinstance(value, float):
  216. return f"{value:.6g}"
  217. return str(value).replace("|", "\\|")
  218. def markdown_report(
  219. *,
  220. command: str,
  221. paths: list[Path],
  222. strategy_total: pd.DataFrame,
  223. portfolio_total: pd.DataFrame,
  224. horizon: pd.DataFrame,
  225. worst_months: pd.DataFrame,
  226. correlation: pd.DataFrame,
  227. ) -> str:
  228. primary_total = portfolio_total[portfolio_total["cost_model"] == PRIMARY_COST].copy()
  229. top = primary_total.head(10)
  230. deep = strategy_total[
  231. (strategy_total["cost_model"] == PRIMARY_COST)
  232. & (strategy_total["family"] == "eth_price_twap_deep")
  233. ].iloc[0]
  234. best = top.iloc[0]
  235. recent = horizon[
  236. (horizon["cost_model"] == PRIMARY_COST)
  237. & (horizon["portfolio"] == best["portfolio"])
  238. ]
  239. recent_ok = bool((recent[recent["horizon"].isin(["6m", "3m"])]["net_total_return"] > 0.0).all())
  240. worth_small = bool(
  241. best["net_max_drawdown"] < deep["net_max_drawdown"]
  242. and best["net_annualized_return"] > 0.0
  243. and recent_ok
  244. )
  245. lines = [
  246. "# ETH strategy portfolio 10y exploration",
  247. "",
  248. f"Run command: `{command}`",
  249. "",
  250. "Output files:",
  251. *[f"- `{path}`" for path in paths],
  252. "",
  253. "Scope: requested 10 years from cached continuous OKX candles; actual coverage is shown in the CSV files.",
  254. f"Baseline: ETH price-TWAP deep maker_taker annualized {deep['net_annualized_return']:.4f}, max DD {deep['net_max_drawdown']:.4f}.",
  255. "",
  256. "## Top 10 maker_taker portfolios",
  257. "",
  258. markdown_table(
  259. top[
  260. [
  261. "portfolio",
  262. "mode",
  263. "leg_count",
  264. "legs",
  265. "net_annualized_return",
  266. "net_max_drawdown",
  267. "net_calmar",
  268. "net_sharpe_daily",
  269. "worst_month_return",
  270. "avg_pair_corr",
  271. "lower_dd_than_deep",
  272. ]
  273. ]
  274. ),
  275. "",
  276. "## Recent horizons for top portfolio",
  277. "",
  278. markdown_table(
  279. recent[
  280. [
  281. "horizon",
  282. "horizon_start",
  283. "horizon_end",
  284. "net_total_return",
  285. "net_annualized_return",
  286. "net_max_drawdown",
  287. "net_calmar",
  288. ]
  289. ]
  290. ),
  291. "",
  292. "## Worst months",
  293. "",
  294. markdown_table(worst_months[worst_months["cost_model"] == PRIMARY_COST].head(10)),
  295. "",
  296. "## Strategy return correlation",
  297. "",
  298. markdown_table(correlation),
  299. "",
  300. "## Live small allocation judgment",
  301. "",
  302. (
  303. "Yes: the best maker_taker portfolio is more suitable than ETH price-TWAP deep for a small live allocation under this test."
  304. if worth_small
  305. else "No: the best maker_taker portfolio does not clear the drawdown plus recent-validity bar against ETH price-TWAP deep under this test."
  306. ),
  307. ]
  308. return "\n".join(lines) + "\n"
  309. def main() -> int:
  310. parser = argparse.ArgumentParser()
  311. parser.add_argument("--years", type=float, default=YEARS)
  312. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  313. parser.add_argument("--max-leg-count", type=int, default=5)
  314. args = parser.parse_args()
  315. strategies = build_strategies()
  316. bars = sorted({strategy.bar for strategy in strategies})
  317. data = {
  318. (symbol, bar): load_candles(symbol, bar, args.years)
  319. for bar in bars
  320. for symbol in ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
  321. }
  322. results: dict[str, tuple[Strategy, explore.SegmentResult]] = {}
  323. for index, strategy in enumerate(strategies, start=1):
  324. key = f"{strategy.family}:{strategy.bar}:{twap_candidate_name(strategy.candidate)}"
  325. results[key] = (strategy, run_strategy(strategy, data))
  326. print(f"done {index}/{len(strategies)} {key}")
  327. start = max(
  328. pd.to_datetime(result.equity_curve[0]["ts"], unit="ms", utc=True)
  329. for _, result in results.values()
  330. )
  331. end = min(
  332. pd.to_datetime(result.equity_curve[-1]["ts"], unit="ms", utc=True)
  333. for _, result in results.values()
  334. )
  335. strategy_rows: list[dict[str, object]] = []
  336. daily_by_cost: dict[str, dict[str, pd.Series]] = {cost: {} for cost in COSTS}
  337. metrics_by_cost: dict[str, dict[str, dict[str, float]]] = {cost: {} for cost in COSTS}
  338. for key, (strategy, result) in results.items():
  339. for cost_name, cost_value in COSTS.items():
  340. frame = explore.cost_adjusted_trade_equity_frame(result, cost_value)
  341. daily = daily_equity(frame, start, end)
  342. metrics = metrics_from_daily_equity(daily)
  343. daily_by_cost[cost_name][key] = daily
  344. metrics_by_cost[cost_name][key] = metrics
  345. strategy_rows.append(
  346. {
  347. "strategy_key": key,
  348. "family": strategy.family,
  349. "bar": strategy.bar,
  350. "name": twap_candidate_name(strategy.candidate),
  351. "cost_model": cost_name,
  352. "roundtrip_cost_on_margin": cost_value,
  353. "first_candle": start.strftime("%Y-%m-%d %H:%M"),
  354. "last_candle": end.strftime("%Y-%m-%d %H:%M"),
  355. "years": (end - start).total_seconds() / 86_400 / 365,
  356. "trades": result.trade_count,
  357. "gross_total_return": result.total_return,
  358. "gross_max_drawdown_mark_to_market": result.max_drawdown,
  359. **metrics,
  360. }
  361. )
  362. strategy_total = pd.DataFrame(strategy_rows)
  363. portfolio_rows: list[dict[str, object]] = []
  364. horizon_output: list[dict[str, object]] = []
  365. equity_frames: list[pd.DataFrame] = []
  366. month_frames: list[pd.DataFrame] = []
  367. keys = list(results.keys())
  368. deep_key = next(key for key, (strategy, _) in results.items() if strategy.family == "eth_price_twap_deep")
  369. combo_index = 0
  370. for cost_name, daily in daily_by_cost.items():
  371. deep_dd = metrics_by_cost[cost_name][deep_key]["net_max_drawdown"]
  372. for leg_count in range(2, min(args.max_leg_count, len(keys)) + 1):
  373. for legs in combinations(keys, leg_count):
  374. if len({results[leg][0].family for leg in legs}) != leg_count:
  375. continue
  376. for mode in ("equal", "risk"):
  377. combo_index += 1
  378. portfolio = f"{mode}-{leg_count}-c{combo_index:03d}-" + "+".join(results[leg][0].family for leg in legs)
  379. series = combine_daily_returns(
  380. name=portfolio,
  381. legs=legs,
  382. mode=mode,
  383. daily=daily,
  384. strategy_metrics=metrics_by_cost[cost_name],
  385. )
  386. metrics = metrics_from_daily_equity(series)
  387. monthly = monthly_rows(portfolio, series)
  388. worst_month = float(monthly["return"].min())
  389. returns = pd.DataFrame({leg: daily[leg].pct_change() for leg in legs}).dropna()
  390. corr = returns.corr()
  391. pair_corrs = [float(corr.loc[left, right]) for left, right in combinations(legs, 2)]
  392. avg_corr = float(pd.Series(pair_corrs).mean()) if pair_corrs else 0.0
  393. portfolio_rows.append(
  394. {
  395. "portfolio": portfolio,
  396. "cost_model": cost_name,
  397. "mode": mode,
  398. "leg_count": leg_count,
  399. "legs": ";".join(legs),
  400. "first_candle": start.strftime("%Y-%m-%d %H:%M"),
  401. "last_candle": end.strftime("%Y-%m-%d %H:%M"),
  402. "years": (end - start).total_seconds() / 86_400 / 365,
  403. "worst_month_return": worst_month,
  404. "avg_pair_corr": avg_corr,
  405. "max_leg_drawdown": max(metrics_by_cost[cost_name][leg]["net_max_drawdown"] for leg in legs),
  406. "lower_dd_than_deep": metrics["net_max_drawdown"] < deep_dd,
  407. **metrics,
  408. }
  409. )
  410. for row in horizon_rows(portfolio, series):
  411. horizon_output.append({"cost_model": cost_name, **row})
  412. equity_frames.append(
  413. pd.DataFrame(
  414. {
  415. "portfolio": portfolio,
  416. "cost_model": cost_name,
  417. "date": series.index.strftime("%Y-%m-%d"),
  418. "equity": series.to_numpy(),
  419. }
  420. )
  421. )
  422. month_frames.append(monthly.assign(cost_model=cost_name))
  423. portfolio_total = pd.DataFrame(portfolio_rows)
  424. portfolio_total = portfolio_total.sort_values(
  425. ["cost_model", "lower_dd_than_deep", "net_calmar", "net_annualized_return", "worst_month_return"],
  426. ascending=[True, False, False, False, False],
  427. )
  428. primary = portfolio_total[portfolio_total["cost_model"] == PRIMARY_COST]
  429. other = portfolio_total[portfolio_total["cost_model"] != PRIMARY_COST]
  430. portfolio_total = pd.concat([primary, other], ignore_index=True)
  431. top_names = set(primary.head(10)["portfolio"])
  432. horizon = pd.DataFrame(horizon_output)
  433. horizon["horizon"] = pd.Categorical(horizon["horizon"], categories=["3y", "1y", "6m", "3m"], ordered=True)
  434. horizon = horizon[horizon["portfolio"].isin(top_names)].sort_values(["cost_model", "portfolio", "horizon"])
  435. equity = pd.concat(equity_frames, ignore_index=True)
  436. equity = equity[equity["portfolio"].isin(top_names)]
  437. monthly = pd.concat(month_frames, ignore_index=True)
  438. worst_months = monthly[monthly["portfolio"].isin(top_names)].sort_values("return").head(50)
  439. primary_daily = pd.DataFrame({key: daily_by_cost[PRIMARY_COST][key].pct_change() for key in keys}).dropna()
  440. correlation = primary_daily.corr().reset_index().rename(columns={"index": "strategy_key"})
  441. args.output_dir.mkdir(parents=True, exist_ok=True)
  442. strategy_path = args.output_dir / "eth-strategy-portfolio-10y-strategies.csv"
  443. total_path = args.output_dir / "eth-strategy-portfolio-10y-total.csv"
  444. top10_path = args.output_dir / "eth-strategy-portfolio-10y-top10.csv"
  445. horizon_path = args.output_dir / "eth-strategy-portfolio-10y-horizon.csv"
  446. corr_path = args.output_dir / "eth-strategy-portfolio-10y-correlation.csv"
  447. worst_path = args.output_dir / "eth-strategy-portfolio-10y-worst-months.csv"
  448. equity_path = args.output_dir / "eth-strategy-portfolio-10y-equity.csv"
  449. report_path = args.output_dir / "eth-strategy-portfolio-10y-report.md"
  450. strategy_total.to_csv(strategy_path, index=False)
  451. portfolio_total.to_csv(total_path, index=False)
  452. primary.head(10).to_csv(top10_path, index=False)
  453. horizon.to_csv(horizon_path, index=False)
  454. correlation.to_csv(corr_path, index=False)
  455. worst_months.to_csv(worst_path, index=False)
  456. equity.to_csv(equity_path, index=False)
  457. command = f"rtk .venv/bin/python {Path(__file__).as_posix()} --years {args.years} --max-leg-count {args.max_leg_count}"
  458. report_path.write_text(
  459. markdown_report(
  460. command=command,
  461. paths=[strategy_path, total_path, top10_path, horizon_path, corr_path, worst_path, equity_path, report_path],
  462. strategy_total=strategy_total,
  463. portfolio_total=portfolio_total,
  464. horizon=horizon,
  465. worst_months=worst_months,
  466. correlation=correlation,
  467. ),
  468. encoding="utf-8",
  469. )
  470. print(primary.head(10).to_string(index=False))
  471. return 0
  472. if __name__ == "__main__":
  473. raise SystemExit(main())