search_eth_focused_portfolio_conservative.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. from __future__ import annotations
  2. import argparse
  3. import sys
  4. from dataclasses import dataclass
  5. from itertools import combinations
  6. from pathlib import Path
  7. import pandas as pd
  8. ROOT = Path(__file__).resolve().parents[1]
  9. sys.path.insert(0, str(ROOT))
  10. from scripts import explore_ultrashort as explore
  11. from scripts import search_eth_robust_twap_fill_slippage as robust_twap
  12. OUTPUT_DIR = Path("reports/eth-exploration")
  13. PREFIX = "eth-focused-portfolio-conservative"
  14. YEARS = 10.0
  15. COSTS = {
  16. "maker_taker": 0.0021,
  17. "taker_taker": 0.0030,
  18. }
  19. HORIZONS = (
  20. ("3y", pd.DateOffset(years=3)),
  21. ("1y", pd.DateOffset(years=1)),
  22. ("6m", pd.DateOffset(months=6)),
  23. ("3m", pd.DateOffset(months=3)),
  24. )
  25. TARGET_DD = 0.20
  26. TARGET_WORST_MONTH = -0.08
  27. @dataclass(frozen=True)
  28. class Leg:
  29. family: str
  30. bar: str
  31. candidate: object
  32. pair: bool
  33. maker_dependent: bool
  34. def leg_name(leg: Leg) -> str:
  35. if isinstance(leg.candidate, dict):
  36. return robust_twap.strategy_name(leg.candidate)
  37. return str(leg.candidate.name)
  38. def leg_key(leg: Leg) -> str:
  39. return f"{leg.family}:{leg.bar}:{leg_name(leg)}"
  40. def robust_twap_specs() -> list[dict[str, object]]:
  41. return [
  42. {
  43. **robust_twap.BASE_SPEC,
  44. "fill_buffer": 0.0,
  45. "price_slippage": 0.0,
  46. "maker_miss_ratio": 0.25,
  47. },
  48. {
  49. **robust_twap.BASE_SPEC,
  50. "fill_buffer": 0.0002,
  51. "price_slippage": 0.0,
  52. "maker_miss_ratio": 0.25,
  53. },
  54. {
  55. **robust_twap.BASE_SPEC,
  56. "fill_buffer": 0.0002,
  57. "price_slippage": 0.0002,
  58. "maker_miss_ratio": 0.25,
  59. },
  60. {
  61. **robust_twap.BASE_SPEC,
  62. "fill_buffer": 0.0005,
  63. "price_slippage": 0.0,
  64. "maker_miss_ratio": 0.25,
  65. },
  66. ]
  67. def build_legs() -> list[Leg]:
  68. legs: list[Leg] = [
  69. Leg(
  70. "eth_btc_rsi_filter",
  71. "15m",
  72. explore.build_eth_btc_rsi_filter_candidate(50, 3.0, 55.0, 120, 240, 0.0),
  73. True,
  74. False,
  75. ),
  76. Leg(
  77. "eth_btc_rsi_filter",
  78. "15m",
  79. explore.build_eth_btc_rsi_filter_candidate(50, 3.0, 55.0, 480, 240, 0.0),
  80. True,
  81. False,
  82. ),
  83. Leg(
  84. "btc_lead_eth_lag_15m",
  85. "15m",
  86. explore.build_btc_lead_eth_lag_candidate(8, 0.018, 0.006, 8, 0.006, 0.018),
  87. True,
  88. False,
  89. ),
  90. Leg(
  91. "btc_lead_eth_lag_15m",
  92. "15m",
  93. explore.build_btc_lead_eth_lag_candidate(16, 0.024, 0.006, 32, 0.006, 0.018),
  94. True,
  95. False,
  96. ),
  97. Leg(
  98. "btc_lead_eth_lag_5m",
  99. "5m",
  100. explore.build_btc_lead_eth_lag_candidate(16, 0.012, 0.006, 32, 0.006, 0.018),
  101. True,
  102. False,
  103. ),
  104. Leg(
  105. "btc_lead_eth_lag_5m",
  106. "5m",
  107. explore.build_btc_lead_eth_lag_candidate(16, 0.012, 0.006, 8, 0.006, 0.018),
  108. True,
  109. False,
  110. ),
  111. ]
  112. legs.extend(Leg("eth_robust_twap", "15m", spec, False, True) for spec in robust_twap_specs())
  113. return legs
  114. def load_candles(symbol: str, bar: str, years: float) -> list[explore.Candle]:
  115. candles, _ = explore.load_cached_candles(explore.CANDLE_CACHE_DIR, symbol, bar)
  116. if not candles:
  117. raise FileNotFoundError(f"missing cached candles for {symbol} {bar}")
  118. requested = explore.history_bars_for_years(bar, years)
  119. return candles[-requested:] if len(candles) > requested else candles
  120. def run_leg(leg: Leg, data: dict[tuple[str, str], list[explore.Candle]]) -> explore.SegmentResult:
  121. eth = data[("ETH-USDT-SWAP", leg.bar)]
  122. if isinstance(leg.candidate, dict):
  123. return robust_twap.run_robust_twap_segment(eth, leg.candidate).result
  124. btc = data[("BTC-USDT-SWAP", leg.bar)]
  125. eth_aligned, btc_aligned = explore.align_pair_candles(eth, btc)
  126. return leg.candidate.run(
  127. eth_candles=eth_aligned,
  128. btc_candles=btc_aligned,
  129. leverage=explore.LEVERAGE,
  130. warmup_bars=leg.candidate.warmup_bars,
  131. )
  132. def daily_equity(frame: pd.DataFrame, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
  133. series = frame.set_index("ts")["equity"].sort_index()
  134. index = pd.date_range(start.normalize(), end.normalize(), freq="1D", tz="UTC")
  135. return series.reindex(index.union(series.index)).sort_index().ffill().reindex(index).ffill()
  136. def metrics_from_daily_equity(series: pd.Series) -> dict[str, float]:
  137. years = (series.index[-1] - series.index[0]).total_seconds() / 86_400 / 365
  138. total_return = float(series.iloc[-1] / series.iloc[0] - 1.0)
  139. annualized_return = (1.0 + total_return) ** (1.0 / years) - 1.0 if total_return > -1.0 and years > 0.0 else 0.0
  140. max_drawdown = explore.max_drawdown_from_equity([float(value) for value in series])
  141. returns = series.pct_change().dropna()
  142. daily_std = float(returns.std(ddof=1)) if len(returns) > 1 else 0.0
  143. sharpe = float(returns.mean()) / daily_std * (365**0.5) if daily_std else 0.0
  144. return {
  145. "net_total_return": total_return,
  146. "net_annualized_return": annualized_return,
  147. "net_max_drawdown": max_drawdown,
  148. "net_calmar": annualized_return / max_drawdown if max_drawdown else 0.0,
  149. "net_sharpe_daily": sharpe,
  150. }
  151. def horizon_rows(portfolio: str, series: pd.Series) -> list[dict[str, object]]:
  152. rows: list[dict[str, object]] = []
  153. end_time = series.index[-1]
  154. for label, offset in HORIZONS:
  155. cutoff = end_time - offset
  156. horizon = series[series.index >= cutoff]
  157. if len(horizon) < 2:
  158. horizon = series
  159. rows.append(
  160. {
  161. "portfolio": portfolio,
  162. "horizon": label,
  163. "horizon_start": horizon.index[0].strftime("%Y-%m-%d"),
  164. "horizon_end": horizon.index[-1].strftime("%Y-%m-%d"),
  165. **metrics_from_daily_equity(horizon),
  166. }
  167. )
  168. return rows
  169. def monthly_rows(portfolio: str, series: pd.Series) -> pd.DataFrame:
  170. monthly = series.resample("ME").last()
  171. frame = pd.DataFrame(
  172. {
  173. "portfolio": portfolio,
  174. "month": monthly.index.strftime("%Y-%m"),
  175. "start_equity": monthly.shift(1).fillna(series.iloc[0]).to_numpy(),
  176. "end_equity": monthly.to_numpy(),
  177. }
  178. )
  179. frame["return"] = frame["end_equity"] / frame["start_equity"] - 1.0
  180. return frame
  181. def portfolio_equity(
  182. *,
  183. name: str,
  184. legs: tuple[str, ...],
  185. mode: str,
  186. daily: dict[str, pd.Series],
  187. leg_metrics: dict[str, dict[str, float]],
  188. ) -> tuple[pd.Series, pd.Series]:
  189. returns = pd.DataFrame({leg: daily[leg].pct_change().fillna(0.0) for leg in legs}).dropna()
  190. if mode == "equal":
  191. weights = pd.Series(1.0 / len(legs), index=legs)
  192. else:
  193. raw = pd.Series({leg: 1.0 / max(leg_metrics[leg]["net_max_drawdown"], 0.01) for leg in legs})
  194. weights = raw / raw.sum()
  195. equity = explore.INITIAL_EQUITY * (1.0 + returns.mul(weights, axis=1).sum(axis=1)).cumprod()
  196. equity.name = name
  197. return equity, weights
  198. def markdown_table(frame: pd.DataFrame) -> str:
  199. columns = list(frame.columns)
  200. rows = [columns, ["---" for _ in columns]]
  201. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  202. return "\n".join("| " + " | ".join(format_cell(value) for value in row) + " |" for row in rows)
  203. def format_cell(value: object) -> str:
  204. if isinstance(value, float):
  205. return f"{value:.6g}"
  206. return str(value).replace("|", "\\|")
  207. def markdown_report(
  208. *,
  209. command: str,
  210. output_files: list[Path],
  211. portfolio_total: pd.DataFrame,
  212. horizon: pd.DataFrame,
  213. worst_months: pd.DataFrame,
  214. leg_total: pd.DataFrame,
  215. ) -> str:
  216. qualified = portfolio_total[portfolio_total["qualified"]].copy()
  217. top = qualified.head(10) if len(qualified) else portfolio_total.head(10)
  218. best_text = "No portfolio met 3y/1y/6m/3m all positive, DD < 20%, and worst month > -8%."
  219. if len(qualified):
  220. best = qualified.iloc[0]
  221. best_return = qualified.sort_values("net_annualized_return", ascending=False).iloc[0]
  222. best_text = (
  223. f"Found {len(qualified)} qualified portfolios. Lowest-DD qualified `{best['portfolio']}` "
  224. f"under {best['cost_model']}: annualized={best['net_annualized_return']:.4f}, "
  225. f"DD={best['net_max_drawdown']:.4f}, worst_month={best['worst_month_return']:.4f}. "
  226. f"Highest-return qualified `{best_return['portfolio']}` under {best_return['cost_model']}: "
  227. f"annualized={best_return['net_annualized_return']:.4f}, DD={best_return['net_max_drawdown']:.4f}, "
  228. f"worst_month={best_return['worst_month_return']:.4f}."
  229. )
  230. counts = (
  231. portfolio_total.groupby(["cost_model", "scope"], as_index=False)
  232. .agg(portfolios=("portfolio", "count"), qualified=("qualified", "sum"))
  233. .sort_values(["cost_model", "scope"])
  234. )
  235. top_names = set(top["portfolio"])
  236. top_horizon = horizon[horizon["portfolio"].isin(top_names)].copy()
  237. lines = [
  238. "# ETH-focused conservative portfolio search",
  239. "",
  240. f"Run command: `{command}`",
  241. "",
  242. "Output files:",
  243. *[f"- `{path}`" for path in output_files],
  244. "",
  245. "Scope: one continuous cached ETH/BTC intersection, requested 10 years; 3y/1y/6m/3m are sliced from the same portfolio equity curves.",
  246. "Costs: maker_taker and taker_taker only.",
  247. "Candidates: ETH/BTC RSI filter, BTC lead ETH lag 5m/15m, ETH robust TWAP stress variants.",
  248. "Portfolio scopes: all_legs and no_maker_dependent.",
  249. "",
  250. f"Decision: {best_text}",
  251. "",
  252. "## Qualification counts",
  253. "",
  254. markdown_table(counts),
  255. "",
  256. "## Top portfolios by conservative sort",
  257. "",
  258. "Sort: qualified first, then cost/scope, then lowest DD, worst month, minimum horizon return, and annualized return.",
  259. "",
  260. markdown_table(
  261. top[
  262. [
  263. "qualified",
  264. "portfolio",
  265. "cost_model",
  266. "scope",
  267. "mode",
  268. "leg_count",
  269. "net_total_return",
  270. "net_annualized_return",
  271. "net_max_drawdown",
  272. "worst_month_return",
  273. "min_horizon_total_return",
  274. "max_horizon_drawdown",
  275. ]
  276. ]
  277. ),
  278. "",
  279. "## Horizon checks for top portfolios",
  280. "",
  281. markdown_table(
  282. top_horizon[
  283. [
  284. "portfolio",
  285. "cost_model",
  286. "horizon",
  287. "net_total_return",
  288. "net_annualized_return",
  289. "net_max_drawdown",
  290. ]
  291. ]
  292. ),
  293. "",
  294. "## Worst months",
  295. "",
  296. markdown_table(worst_months.head(20)),
  297. "",
  298. "## Leg totals",
  299. "",
  300. markdown_table(
  301. leg_total[
  302. [
  303. "leg_key",
  304. "cost_model",
  305. "family",
  306. "bar",
  307. "maker_dependent",
  308. "trades",
  309. "net_total_return",
  310. "net_annualized_return",
  311. "net_max_drawdown",
  312. ]
  313. ]
  314. ),
  315. ]
  316. return "\n".join(lines) + "\n"
  317. def main() -> int:
  318. parser = argparse.ArgumentParser()
  319. parser.add_argument("--years", type=float, default=YEARS)
  320. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  321. parser.add_argument("--max-leg-count", type=int, default=4)
  322. args = parser.parse_args()
  323. legs = build_legs()
  324. bars = sorted({leg.bar for leg in legs})
  325. data = {
  326. (symbol, bar): load_candles(symbol, bar, args.years)
  327. for bar in bars
  328. for symbol in ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
  329. }
  330. results: dict[str, tuple[Leg, explore.SegmentResult]] = {}
  331. for index, leg in enumerate(legs, start=1):
  332. key = leg_key(leg)
  333. results[key] = (leg, run_leg(leg, data))
  334. print(f"done {index}/{len(legs)} {key}", flush=True)
  335. start = max(pd.to_datetime(result.equity_curve[0]["ts"], unit="ms", utc=True) for _, result in results.values())
  336. end = min(pd.to_datetime(result.equity_curve[-1]["ts"], unit="ms", utc=True) for _, result in results.values())
  337. daily_by_cost: dict[str, dict[str, pd.Series]] = {cost: {} for cost in COSTS}
  338. metrics_by_cost: dict[str, dict[str, dict[str, float]]] = {cost: {} for cost in COSTS}
  339. leg_rows: list[dict[str, object]] = []
  340. for key, (leg, result) in results.items():
  341. for cost_model, cost_value in COSTS.items():
  342. frame = explore.cost_adjusted_trade_equity_frame(result, cost_value)
  343. daily = daily_equity(frame, start, end)
  344. metrics = metrics_from_daily_equity(daily)
  345. daily_by_cost[cost_model][key] = daily
  346. metrics_by_cost[cost_model][key] = metrics
  347. leg_rows.append(
  348. {
  349. "leg_key": key,
  350. "cost_model": cost_model,
  351. "roundtrip_cost_on_margin": cost_value,
  352. "family": leg.family,
  353. "bar": leg.bar,
  354. "maker_dependent": leg.maker_dependent,
  355. "name": leg_name(leg),
  356. "first_candle": start.strftime("%Y-%m-%d %H:%M"),
  357. "last_candle": end.strftime("%Y-%m-%d %H:%M"),
  358. "years": (end - start).total_seconds() / 86_400 / 365,
  359. "trades": result.trade_count,
  360. "gross_total_return": result.total_return,
  361. "gross_max_drawdown_mark_to_market": result.max_drawdown,
  362. **metrics,
  363. }
  364. )
  365. portfolio_rows: list[dict[str, object]] = []
  366. horizon_output: list[dict[str, object]] = []
  367. monthly_frames: list[pd.DataFrame] = []
  368. equity_frames: list[pd.DataFrame] = []
  369. all_keys = list(results.keys())
  370. combo_index = 0
  371. for cost_model, daily in daily_by_cost.items():
  372. for scope, keys in (
  373. ("all_legs", all_keys),
  374. ("no_maker_dependent", [key for key in all_keys if not results[key][0].maker_dependent]),
  375. ):
  376. max_leg_count = min(args.max_leg_count, len(keys))
  377. for leg_count in range(2, max_leg_count + 1):
  378. for selected in combinations(keys, leg_count):
  379. if len({results[key][0].family for key in selected}) != leg_count:
  380. continue
  381. for mode in ("equal", "risk"):
  382. combo_index += 1
  383. families = "+".join(results[key][0].family for key in selected)
  384. portfolio = f"{scope}-{mode}-{leg_count}-c{combo_index:04d}-{families}"
  385. series, weights = portfolio_equity(
  386. name=portfolio,
  387. legs=selected,
  388. mode=mode,
  389. daily=daily,
  390. leg_metrics=metrics_by_cost[cost_model],
  391. )
  392. metrics = metrics_from_daily_equity(series)
  393. monthly = monthly_rows(portfolio, series)
  394. worst_month = float(monthly["return"].min())
  395. current_horizons = horizon_rows(portfolio, series)
  396. horizon_min_return = min(float(row["net_total_return"]) for row in current_horizons)
  397. horizon_max_dd = max(float(row["net_max_drawdown"]) for row in current_horizons)
  398. all_horizons_positive = horizon_min_return > 0.0
  399. qualified = (
  400. all_horizons_positive
  401. and metrics["net_max_drawdown"] < TARGET_DD
  402. and worst_month > TARGET_WORST_MONTH
  403. )
  404. portfolio_rows.append(
  405. {
  406. "portfolio": portfolio,
  407. "cost_model": cost_model,
  408. "scope": scope,
  409. "mode": mode,
  410. "leg_count": leg_count,
  411. "legs": ";".join(selected),
  412. "weights": ";".join(f"{key}={weights[key]:.8f}" for key in selected),
  413. "first_candle": start.strftime("%Y-%m-%d %H:%M"),
  414. "last_candle": end.strftime("%Y-%m-%d %H:%M"),
  415. "years": (end - start).total_seconds() / 86_400 / 365,
  416. "worst_month_return": worst_month,
  417. "all_horizons_positive": all_horizons_positive,
  418. "min_horizon_total_return": horizon_min_return,
  419. "max_horizon_drawdown": horizon_max_dd,
  420. "qualified": qualified,
  421. **metrics,
  422. }
  423. )
  424. for row in current_horizons:
  425. horizon_output.append({"cost_model": cost_model, "scope": scope, **row})
  426. monthly_frames.append(monthly.assign(cost_model=cost_model, scope=scope))
  427. equity_frames.append(
  428. pd.DataFrame(
  429. {
  430. "portfolio": portfolio,
  431. "cost_model": cost_model,
  432. "scope": scope,
  433. "date": series.index.strftime("%Y-%m-%d"),
  434. "equity": series.to_numpy(),
  435. }
  436. )
  437. )
  438. leg_total = pd.DataFrame(leg_rows).sort_values(["cost_model", "net_calmar", "net_annualized_return"], ascending=[True, False, False])
  439. portfolio_total = pd.DataFrame(portfolio_rows).sort_values(
  440. [
  441. "qualified",
  442. "cost_model",
  443. "scope",
  444. "net_max_drawdown",
  445. "worst_month_return",
  446. "min_horizon_total_return",
  447. "net_annualized_return",
  448. ],
  449. ascending=[False, True, True, True, False, False, False],
  450. )
  451. top_names = set(portfolio_total.head(25)["portfolio"])
  452. horizon = pd.DataFrame(horizon_output)
  453. horizon["horizon"] = pd.Categorical(horizon["horizon"], categories=["3y", "1y", "6m", "3m"], ordered=True)
  454. horizon = horizon[horizon["portfolio"].isin(top_names)].sort_values(["cost_model", "portfolio", "horizon"])
  455. monthly = pd.concat(monthly_frames, ignore_index=True)
  456. worst_months = monthly[monthly["portfolio"].isin(top_names)].sort_values("return").head(100)
  457. equity = pd.concat(equity_frames, ignore_index=True)
  458. equity = equity[equity["portfolio"].isin(top_names)]
  459. args.output_dir.mkdir(parents=True, exist_ok=True)
  460. leg_path = args.output_dir / f"{PREFIX}-legs.csv"
  461. total_path = args.output_dir / f"{PREFIX}-total.csv"
  462. qualified_path = args.output_dir / f"{PREFIX}-qualified.csv"
  463. horizon_path = args.output_dir / f"{PREFIX}-horizon.csv"
  464. worst_path = args.output_dir / f"{PREFIX}-worst-months.csv"
  465. equity_path = args.output_dir / f"{PREFIX}-equity.csv"
  466. report_path = args.output_dir / f"{PREFIX}-report.md"
  467. leg_total.to_csv(leg_path, index=False)
  468. portfolio_total.to_csv(total_path, index=False)
  469. portfolio_total[portfolio_total["qualified"]].to_csv(qualified_path, index=False)
  470. horizon.to_csv(horizon_path, index=False)
  471. worst_months.to_csv(worst_path, index=False)
  472. equity.to_csv(equity_path, index=False)
  473. command = f"rtk .venv/bin/python {Path(__file__).as_posix()} --years {args.years} --max-leg-count {args.max_leg_count}"
  474. report_path.write_text(
  475. markdown_report(
  476. command=command,
  477. output_files=[leg_path, total_path, qualified_path, horizon_path, worst_path, equity_path, report_path],
  478. portfolio_total=portfolio_total,
  479. horizon=horizon,
  480. worst_months=worst_months,
  481. leg_total=leg_total,
  482. ),
  483. encoding="utf-8",
  484. )
  485. print(portfolio_total.head(20).to_string(index=False))
  486. return 0
  487. if __name__ == "__main__":
  488. raise SystemExit(main())