search_cross_symbol_high_frequency_portfolios.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. from __future__ import annotations
  2. import argparse
  3. import json
  4. import sys
  5. from dataclasses import dataclass
  6. from itertools import combinations
  7. from pathlib import Path
  8. from typing import Callable
  9. import pandas as pd
  10. sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
  11. from okx_codex_trader.bbmr_report import BBMRConfig, run_bbmr_segment
  12. from okx_codex_trader.bbsb_report import BBSBConfig, run_bbsb_segment
  13. from okx_codex_trader.models import Candle
  14. from scripts import explore_ultrashort as explore
  15. from scripts import search_eth_btc_nextgen_variants as nextgen
  16. OUTPUT_DIR = Path("reports/ultrashort")
  17. PREFIX = "high-frequency-portfolio"
  18. YEARS = 10.0
  19. COST_MODELS = {
  20. "maker_taker": 0.0021,
  21. "taker_taker": 0.0030,
  22. }
  23. PRIMARY_COST_MODEL = "maker_taker"
  24. MIN_TRADES_PER_MONTH = 15.0
  25. MAX_DRAWDOWN = 0.20
  26. PRIMARY_SYMBOLS = ("BTC-USDT-SWAP", "ETH-USDT-SWAP")
  27. HORIZONS = (
  28. ("full", None),
  29. ("3y", pd.DateOffset(years=3)),
  30. ("1y", pd.DateOffset(years=1)),
  31. ("6m", pd.DateOffset(months=6)),
  32. ("3m", pd.DateOffset(months=3)),
  33. )
  34. @dataclass(frozen=True)
  35. class LegSpec:
  36. key: str
  37. symbol: str
  38. family: str
  39. bar: str
  40. pair: bool
  41. warmup_bars: int
  42. run: Callable[[dict[tuple[str, str], list[Candle]]], explore.SegmentResult]
  43. @dataclass(frozen=True)
  44. class LegReturn:
  45. leg: str
  46. symbol: str
  47. family: str
  48. exit_time: pd.Timestamp
  49. value: float
  50. def load_candles(symbol: str, bar: str, years: float) -> list[Candle]:
  51. candles, _ = explore.load_cached_candles(explore.CANDLE_CACHE_DIR, symbol, bar)
  52. if not candles:
  53. raise FileNotFoundError(f"missing cached candles for {symbol} {bar}")
  54. requested = explore.history_bars_for_years(bar, years)
  55. return candles[-requested:] if len(candles) > requested else candles
  56. def single_leg(symbol: str, family: str, bar: str, candidate: explore.Candidate) -> LegSpec:
  57. key = f"{symbol}:{family}:{bar}:{candidate.name}"
  58. return LegSpec(
  59. key=key,
  60. symbol=symbol,
  61. family=family,
  62. bar=bar,
  63. pair=False,
  64. warmup_bars=candidate.warmup_bars,
  65. run=lambda data, symbol=symbol, bar=bar, candidate=candidate: candidate.run(
  66. candles=data[(symbol, bar)],
  67. leverage=explore.LEVERAGE,
  68. warmup_bars=candidate.warmup_bars,
  69. ),
  70. )
  71. def pair_leg(family: str, bar: str, candidate: explore.PairCandidate) -> LegSpec:
  72. key = f"ETH-USDT-SWAP:{family}:{bar}:{candidate.name}"
  73. return LegSpec(
  74. key=key,
  75. symbol="ETH-USDT-SWAP",
  76. family=family,
  77. bar=bar,
  78. pair=True,
  79. warmup_bars=candidate.warmup_bars,
  80. run=lambda data, bar=bar, candidate=candidate: run_pair_candidate(candidate, data, bar),
  81. )
  82. def run_pair_candidate(
  83. candidate: explore.PairCandidate,
  84. data: dict[tuple[str, str], list[Candle]],
  85. bar: str,
  86. ) -> explore.SegmentResult:
  87. eth, btc = explore.align_pair_candles(data[("ETH-USDT-SWAP", bar)], data[("BTC-USDT-SWAP", bar)])
  88. return candidate.run(
  89. eth_candles=eth,
  90. btc_candles=btc,
  91. leverage=explore.LEVERAGE,
  92. warmup_bars=candidate.warmup_bars,
  93. )
  94. def build_single_symbol_candidates() -> list[tuple[str, explore.Candidate]]:
  95. candidates: list[tuple[str, explore.Candidate]] = [
  96. (
  97. "bbmr",
  98. explore.Candidate(
  99. "bbmr-default",
  100. 69,
  101. lambda candles, leverage, warmup_bars: run_bbmr_segment(
  102. candles=candles,
  103. leverage=leverage,
  104. warmup_bars=warmup_bars,
  105. config=BBMRConfig(),
  106. ),
  107. ),
  108. ),
  109. ]
  110. for trend in (30, 50):
  111. for long_threshold, short_threshold in ((8.0, 92.0), (12.0, 88.0)):
  112. candidates.append(("rsi", explore.build_rsi2_side_candidate(trend, long_threshold, short_threshold, 50.0, "both")))
  113. for fast, slow in ((8, 21), (13, 34)):
  114. candidates.append(("ma", explore.build_ma_cross_candidate(fast, slow, "both")))
  115. for window in (24, 48):
  116. candidates.append(
  117. (
  118. "vwap",
  119. explore.Candidate(
  120. f"vwap-revert-w{window}-z1.5-sl0.006",
  121. window * 2,
  122. lambda candles, leverage, warmup_bars, window=window: explore.run_vwap_reversion_segment(
  123. candles=candles,
  124. leverage=leverage,
  125. warmup_bars=warmup_bars,
  126. window=window,
  127. entry_z=1.5,
  128. exit_z=0.2,
  129. stop_loss_pct=0.006,
  130. ),
  131. ),
  132. )
  133. )
  134. candidates.append(
  135. (
  136. "bbsb",
  137. explore.Candidate(
  138. "bbsb-default",
  139. 69,
  140. lambda candles, leverage, warmup_bars: run_bbsb_segment(
  141. candles=candles,
  142. leverage=leverage,
  143. warmup_bars=warmup_bars,
  144. config=BBSBConfig(),
  145. ),
  146. ),
  147. )
  148. )
  149. return candidates
  150. def build_legs() -> list[LegSpec]:
  151. legs: list[LegSpec] = []
  152. single_candidates = build_single_symbol_candidates()
  153. for symbol in PRIMARY_SYMBOLS:
  154. for family, candidate in single_candidates:
  155. legs.append(single_leg(symbol, family, "15m", candidate))
  156. nextgen_keep = {
  157. "btc_trend_eth_rsi:15m:eth-btc-rsi-filter-et50-l3.0-x55.0-bt480-bm240-br0.0",
  158. "btc_shock_guard_eth_rsi:15m:eth-btc-shock-filter-et50-l3.0-x55.0-bt480-bm240-br0.01-sw96-sv0.01-sd0.05",
  159. "btc_lead_eth_lag:5m:btc-lead-eth-lag-lb16-br0.012-gap0.006-mh8-sl0.006-tp0.018",
  160. "btc_lead_eth_lag:5m:btc-lead-eth-lag-lb16-br0.012-gap0.006-mh32-sl0.006-tp0.018",
  161. "btc_lead_eth_lag:15m:btc-lead-eth-lag-lb8-br0.018-gap0.006-mh8-sl0.006-tp0.018",
  162. "btc_lead_eth_lag:15m:btc-lead-eth-lag-lb16-br0.024-gap0.006-mh32-sl0.006-tp0.018",
  163. }
  164. for strategy in nextgen.build_strategies():
  165. key = f"{strategy.family}:{strategy.bar}:{strategy.candidate.name}"
  166. if key in nextgen_keep:
  167. legs.append(pair_leg(f"nextgen_{strategy.family}", strategy.bar, strategy.candidate))
  168. return legs
  169. def cost_trade_returns(spec: LegSpec, result: explore.SegmentResult, roundtrip_cost: float) -> list[LegReturn]:
  170. rows: list[LegReturn] = []
  171. for trade in result.trades:
  172. value = float(trade["return_pct"]) / 100.0 - roundtrip_cost * float(trade.get("cost_weight", 1.0))
  173. rows.append(
  174. LegReturn(
  175. leg=spec.key,
  176. symbol=spec.symbol,
  177. family=spec.family,
  178. exit_time=pd.to_datetime(str(trade["exit_time"]), utc=True),
  179. value=value,
  180. )
  181. )
  182. return rows
  183. def returns_to_daily_equity(
  184. name: str,
  185. returns: list[LegReturn],
  186. start: pd.Timestamp,
  187. end: pd.Timestamp,
  188. ) -> pd.Series:
  189. index = pd.date_range(start.normalize(), end.normalize(), freq="1D", tz="UTC")
  190. if not returns:
  191. series = pd.Series(explore.INITIAL_EQUITY, index=index, name=name, dtype=float)
  192. return series
  193. frame = pd.DataFrame({"date": [row.exit_time.normalize() for row in returns], "return": [row.value for row in returns]})
  194. daily_returns = frame.groupby("date")["return"].sum().reindex(index, fill_value=0.0)
  195. equity = explore.INITIAL_EQUITY * (1.0 + daily_returns).cumprod()
  196. equity.iloc[0] = explore.INITIAL_EQUITY
  197. equity.name = name
  198. return equity
  199. def metrics_from_daily(series: pd.Series) -> dict[str, float]:
  200. years = (series.index[-1] - series.index[0]).total_seconds() / 86_400 / 365
  201. total_return = float(series.iloc[-1] / series.iloc[0] - 1.0)
  202. annualized_return = (1.0 + total_return) ** (1.0 / years) - 1.0 if total_return > -1.0 and years > 0.0 else 0.0
  203. max_drawdown = explore.max_drawdown_from_equity([float(value) for value in series])
  204. daily_returns = series.pct_change().dropna()
  205. daily_std = float(daily_returns.std(ddof=1)) if len(daily_returns) > 1 else 0.0
  206. risk_reward = float(daily_returns.mean()) / daily_std * (365**0.5) if daily_std else 0.0
  207. return {
  208. "total_return": total_return,
  209. "annualized_return": annualized_return,
  210. "max_drawdown": max_drawdown,
  211. "calmar": annualized_return / max_drawdown if max_drawdown else 0.0,
  212. "risk_reward_ratio": risk_reward,
  213. }
  214. def trade_stats(returns: list[LegReturn], start: pd.Timestamp, end: pd.Timestamp) -> dict[str, float]:
  215. values = [row.value for row in returns if start <= row.exit_time <= end]
  216. wins = [value for value in values if value > 0.0]
  217. losses = [value for value in values if value < 0.0]
  218. avg_win = sum(wins) / len(wins) if wins else 0.0
  219. avg_loss_abs = abs(sum(losses) / len(losses)) if losses else 0.0
  220. gross_profit = sum(wins)
  221. gross_loss_abs = abs(sum(losses))
  222. months = max((end - start).total_seconds() / 86_400 / 30.4375, 1e-9)
  223. return {
  224. "trades": len(values),
  225. "trades_per_month": len(values) / months,
  226. "win_rate": len(wins) / len(values) if values else 0.0,
  227. "payoff_ratio": avg_win / avg_loss_abs if avg_loss_abs else 0.0,
  228. "profit_factor": gross_profit / gross_loss_abs if gross_loss_abs else 0.0,
  229. }
  230. def monthly_rows(name: str, series: pd.Series) -> pd.DataFrame:
  231. monthly = series.resample("ME").last()
  232. frame = pd.DataFrame(
  233. {
  234. "portfolio": name,
  235. "month": monthly.index.strftime("%Y-%m"),
  236. "start_equity": monthly.shift(1).fillna(series.iloc[0]).to_numpy(),
  237. "end_equity": monthly.to_numpy(),
  238. }
  239. )
  240. frame["return"] = frame["end_equity"] / frame["start_equity"] - 1.0
  241. return frame
  242. def monthly_stability(monthly: pd.DataFrame) -> dict[str, float | int]:
  243. positive_months = int((monthly["return"] > 0.0).sum()) if len(monthly) else 0
  244. negative_months = int((monthly["return"] < 0.0).sum()) if len(monthly) else 0
  245. return {
  246. "months": len(monthly),
  247. "positive_month_rate": positive_months / len(monthly) if len(monthly) else 0.0,
  248. "negative_months": negative_months,
  249. }
  250. def horizon_metrics(
  251. portfolio: str,
  252. cost_model: str,
  253. series: pd.Series,
  254. returns: list[LegReturn],
  255. monthly: pd.DataFrame,
  256. ) -> list[dict[str, object]]:
  257. output: list[dict[str, object]] = []
  258. end = series.index[-1]
  259. for label, offset in HORIZONS:
  260. horizon = series if offset is None else series[series.index >= end - offset]
  261. if len(horizon) < 2:
  262. horizon = series
  263. start = horizon.index[0]
  264. horizon_monthly = monthly[monthly["month"] >= start.strftime("%Y-%m")]
  265. worst = horizon_monthly.sort_values("return").iloc[0] if len(horizon_monthly) else None
  266. output.append(
  267. {
  268. "portfolio": portfolio,
  269. "cost_model": cost_model,
  270. "horizon": label,
  271. "horizon_start": start.strftime("%Y-%m-%d"),
  272. "horizon_end": end.strftime("%Y-%m-%d"),
  273. "worst_month": "" if worst is None else str(worst["month"]),
  274. "worst_month_return": 0.0 if worst is None else float(worst["return"]),
  275. **metrics_from_daily(horizon),
  276. **trade_stats(returns, start, end),
  277. **monthly_stability(horizon_monthly),
  278. }
  279. )
  280. return output
  281. def split_weighted_returns(legs: tuple[str, ...], weights: pd.Series, leg_returns: dict[str, list[LegReturn]]) -> list[LegReturn]:
  282. rows: list[LegReturn] = []
  283. for leg in legs:
  284. weight = float(weights[leg])
  285. for row in leg_returns[leg]:
  286. rows.append(LegReturn(row.leg, row.symbol, row.family, row.exit_time, row.value * weight))
  287. return rows
  288. def leg_contribution_rows(
  289. portfolio: str,
  290. cost_model: str,
  291. legs: tuple[str, ...],
  292. weights: pd.Series,
  293. leg_returns: dict[str, list[LegReturn]],
  294. end: pd.Timestamp,
  295. ) -> list[dict[str, object]]:
  296. rows: list[dict[str, object]] = []
  297. for label, offset in HORIZONS:
  298. start = pd.Timestamp.min.tz_localize("UTC") if offset is None else end - offset
  299. total = 0.0
  300. by_leg: dict[str, float] = {}
  301. for leg in legs:
  302. contribution = sum(row.value * float(weights[leg]) for row in leg_returns[leg] if row.exit_time >= start)
  303. by_leg[leg] = contribution
  304. total += contribution
  305. for leg in legs:
  306. rows.append(
  307. {
  308. "portfolio": portfolio,
  309. "cost_model": cost_model,
  310. "horizon": label,
  311. "leg": leg,
  312. "weight": float(weights[leg]),
  313. "contribution_return_sum": by_leg[leg],
  314. "contribution_share": by_leg[leg] / total if total else 0.0,
  315. }
  316. )
  317. return rows
  318. def add_cost_columns(row: dict[str, object], cost_model: str, roundtrip_cost: float) -> None:
  319. row["cost_model"] = cost_model
  320. row["roundtrip_cost_on_margin"] = roundtrip_cost
  321. row["net_total_return"] = row["total_return"]
  322. row["net_annualized_return"] = row["annualized_return"]
  323. row["net_max_drawdown"] = row["max_drawdown"]
  324. row["net_calmar"] = row["calmar"]
  325. def risk_qualified(frame: pd.DataFrame) -> pd.DataFrame:
  326. full = frame[frame["horizon"] == "full"].copy()
  327. recent = full
  328. for horizon in ("3y", "1y", "6m", "3m"):
  329. part = frame[frame["horizon"] == horizon][["cost_model", "portfolio", "total_return", "max_drawdown", "calmar"]].rename(
  330. columns={"total_return": f"ret_{horizon}", "max_drawdown": f"dd_{horizon}", "calmar": f"calmar_{horizon}"}
  331. )
  332. recent = recent.merge(part, on=["cost_model", "portfolio"], how="inner")
  333. qualified = recent[
  334. (recent["trades_per_month"] >= MIN_TRADES_PER_MONTH)
  335. & (recent["max_drawdown"] < MAX_DRAWDOWN)
  336. & (recent["ret_1y"] > 0.0)
  337. & (recent["ret_6m"] > 0.0)
  338. & (recent["ret_3m"] > 0.0)
  339. ].copy()
  340. qualified["risk_rank"] = (
  341. qualified["calmar"] * 2.0
  342. + qualified["annualized_return"]
  343. + qualified["ret_1y"] * 0.5
  344. - qualified["max_drawdown"]
  345. )
  346. return qualified.sort_values(["cost_model", "risk_rank"], ascending=[True, False])
  347. def robust_survivors(frame: pd.DataFrame) -> pd.DataFrame:
  348. full = frame[frame["horizon"] == "full"].copy()
  349. recent = full
  350. for horizon in ("3y", "1y", "6m", "3m"):
  351. part = frame[frame["horizon"] == horizon][["cost_model", "portfolio", "total_return", "calmar"]].rename(
  352. columns={"total_return": f"ret_{horizon}", "calmar": f"calmar_{horizon}"}
  353. )
  354. recent = recent.merge(part, on=["cost_model", "portfolio"], how="inner")
  355. return recent[
  356. (recent["trades_per_month"] >= MIN_TRADES_PER_MONTH)
  357. & (recent["total_return"] > 0.0)
  358. & (recent["calmar"] > 0.0)
  359. & (recent["ret_3y"] > 0.0)
  360. & (recent["ret_1y"] > 0.0)
  361. & (recent["ret_6m"] > 0.0)
  362. & (recent["ret_3m"] > 0.0)
  363. & (recent["calmar_3y"] > 0.0)
  364. & (recent["calmar_1y"] > 0.0)
  365. & (recent["calmar_6m"] > 0.0)
  366. & (recent["calmar_3m"] > 0.0)
  367. ].copy()
  368. def format_cell(value: object) -> str:
  369. if isinstance(value, float):
  370. return f"{value:.6g}"
  371. return str(value).replace("|", "\\|")
  372. def markdown_table(frame: pd.DataFrame) -> str:
  373. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  374. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  375. return "\n".join("| " + " | ".join(format_cell(value) for value in row) + " |" for row in rows)
  376. def write_report(
  377. command: str,
  378. output_files: list[Path],
  379. portfolio_total: pd.DataFrame,
  380. qualified: pd.DataFrame,
  381. robust: pd.DataFrame,
  382. horizon: pd.DataFrame,
  383. leg_contrib: pd.DataFrame,
  384. ) -> str:
  385. primary_total = portfolio_total[portfolio_total["cost_model"] == PRIMARY_COST_MODEL]
  386. primary_qualified = qualified[qualified["cost_model"] == PRIMARY_COST_MODEL]
  387. stress_qualified = qualified[qualified["cost_model"] == "taker_taker"]
  388. top = primary_total.head(10)
  389. top_qualified = primary_qualified.head(10)
  390. best_name = str(top.iloc[0]["portfolio"]) if len(top) else ""
  391. best_horizon = horizon[(horizon["cost_model"] == PRIMARY_COST_MODEL) & (horizon["portfolio"] == best_name)]
  392. best_contrib = leg_contrib[(leg_contrib["cost_model"] == PRIMARY_COST_MODEL) & (leg_contrib["portfolio"] == best_name)]
  393. lines = [
  394. "# Cross-symbol high-frequency portfolio ranking",
  395. "",
  396. f"Run command: `{command}`",
  397. "",
  398. "No exchange API, order placement, or live trading path is used. The search reads local BTC/ETH candle data and existing local strategy modules only.",
  399. "Primary ranking uses maker/taker roundtrip margin cost 0.21%. Taker/taker stress uses 0.30%. Funding and slippage remain excluded.",
  400. "",
  401. "Output files:",
  402. *[f"- `{path}`" for path in output_files],
  403. "",
  404. "Selection target: at least 15 trades/month, max DD below 20%, and positive 1y/6m/3m return.",
  405. f"Strict robust survivors with positive full/3y/1y/6m/3m net return and Calmar: {len(robust)}.",
  406. "",
  407. "## Top portfolios: maker/taker",
  408. "",
  409. markdown_table(
  410. top[
  411. [
  412. "cost_model",
  413. "portfolio",
  414. "mode",
  415. "leg_count",
  416. "symbols",
  417. "families",
  418. "total_return",
  419. "annualized_return",
  420. "max_drawdown",
  421. "calmar",
  422. "risk_reward_ratio",
  423. "trades_per_month",
  424. "worst_month_return",
  425. "positive_month_rate",
  426. "recent_positive",
  427. "qualified",
  428. ]
  429. ]
  430. ),
  431. "",
  432. "## Qualified portfolios: maker/taker",
  433. "",
  434. markdown_table(
  435. top_qualified[
  436. [
  437. "cost_model",
  438. "portfolio",
  439. "mode",
  440. "leg_count",
  441. "symbols",
  442. "families",
  443. "total_return",
  444. "annualized_return",
  445. "max_drawdown",
  446. "calmar",
  447. "trades_per_month",
  448. "worst_month_return",
  449. "positive_month_rate",
  450. "ret_3y",
  451. "ret_1y",
  452. "ret_6m",
  453. "ret_3m",
  454. ]
  455. ]
  456. )
  457. if len(top_qualified)
  458. else "No portfolio cleared all target filters.",
  459. "",
  460. "## Taker/taker stress survivors",
  461. "",
  462. markdown_table(
  463. stress_qualified.head(10)[
  464. [
  465. "cost_model",
  466. "portfolio",
  467. "mode",
  468. "leg_count",
  469. "total_return",
  470. "annualized_return",
  471. "max_drawdown",
  472. "calmar",
  473. "trades_per_month",
  474. "positive_month_rate",
  475. "ret_3y",
  476. "ret_1y",
  477. "ret_6m",
  478. "ret_3m",
  479. ]
  480. ]
  481. )
  482. if len(stress_qualified)
  483. else "No taker/taker portfolio cleared all target filters.",
  484. "",
  485. "## Best portfolio horizons",
  486. "",
  487. markdown_table(
  488. best_horizon[
  489. [
  490. "horizon",
  491. "total_return",
  492. "annualized_return",
  493. "max_drawdown",
  494. "calmar",
  495. "win_rate",
  496. "payoff_ratio",
  497. "profit_factor",
  498. "risk_reward_ratio",
  499. "trades",
  500. "trades_per_month",
  501. "worst_month",
  502. "worst_month_return",
  503. "positive_month_rate",
  504. "negative_months",
  505. ]
  506. ]
  507. ),
  508. "",
  509. "## Best portfolio leg contribution",
  510. "",
  511. markdown_table(best_contrib[best_contrib["horizon"].isin(["full", "1y", "6m", "3m"])]),
  512. ]
  513. return "\n".join(lines) + "\n"
  514. def main() -> int:
  515. parser = argparse.ArgumentParser()
  516. parser.add_argument("--years", type=float, default=YEARS)
  517. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  518. parser.add_argument("--max-legs-per-symbol", type=int, default=12)
  519. parser.add_argument("--max-leg-count", type=int, default=4)
  520. args = parser.parse_args()
  521. legs = build_legs()
  522. bars = sorted({leg.bar for leg in legs})
  523. data = {(symbol, bar): load_candles(symbol, bar, args.years) for symbol in PRIMARY_SYMBOLS for bar in bars}
  524. leg_returns: dict[str, list[LegReturn]] = {}
  525. leg_rows: list[dict[str, object]] = []
  526. daily: dict[str, pd.Series] = {}
  527. for index, leg in enumerate(legs, start=1):
  528. result = leg.run(data)
  529. start = pd.to_datetime(result.equity_curve[0]["ts"], unit="ms", utc=True)
  530. end = pd.to_datetime(result.equity_curve[-1]["ts"], unit="ms", utc=True)
  531. for cost_model, roundtrip_cost in COST_MODELS.items():
  532. cost_key = f"{cost_model}:{leg.key}"
  533. returns = cost_trade_returns(leg, result, roundtrip_cost)
  534. leg_returns[cost_key] = returns
  535. series = returns_to_daily_equity(cost_key, returns, start, end)
  536. monthly = monthly_rows(cost_key, series)
  537. worst = float(monthly["return"].min()) if len(monthly) else 0.0
  538. row = {
  539. "leg": leg.key,
  540. "symbol": leg.symbol,
  541. "family": leg.family,
  542. "bar": leg.bar,
  543. "pair_signal": leg.pair,
  544. "start": start.strftime("%Y-%m-%d %H:%M"),
  545. "end": end.strftime("%Y-%m-%d %H:%M"),
  546. "trades": len(returns),
  547. "worst_month_return": worst,
  548. **metrics_from_daily(series),
  549. **trade_stats(returns, series.index[0], series.index[-1]),
  550. **monthly_stability(monthly),
  551. }
  552. add_cost_columns(row, cost_model, roundtrip_cost)
  553. leg_rows.append(row)
  554. daily[cost_key] = series
  555. print(f"done leg {index}/{len(legs)} {leg.key}")
  556. leg_total_all = pd.DataFrame(leg_rows).sort_values(
  557. ["calmar", "annualized_return", "trades_per_month"],
  558. ascending=[False, False, False],
  559. )
  560. primary_leg_total = leg_total_all[leg_total_all["cost_model"] == PRIMARY_COST_MODEL]
  561. selected: list[str] = []
  562. for symbol in PRIMARY_SYMBOLS:
  563. symbol_rows = primary_leg_total[primary_leg_total["symbol"] == symbol].head(args.max_legs_per_symbol)
  564. selected.extend(str(leg) for leg in symbol_rows["leg"])
  565. if len(set(selected)) < 2:
  566. raise RuntimeError("not enough selected BTC/ETH legs to build portfolios")
  567. selected = list(dict.fromkeys(selected))
  568. key_to_leg = {leg.key: leg for leg in legs}
  569. primary_daily_key = lambda key: f"{PRIMARY_COST_MODEL}:{key}"
  570. common_start = max(daily[primary_daily_key(key)].index[0] for key in selected)
  571. common_end = min(daily[primary_daily_key(key)].index[-1] for key in selected)
  572. selected_daily = {
  573. cost_model: {
  574. key: daily[f"{cost_model}:{key}"][(daily[f"{cost_model}:{key}"].index >= common_start) & (daily[f"{cost_model}:{key}"].index <= common_end)]
  575. for key in selected
  576. }
  577. for cost_model in COST_MODELS
  578. }
  579. leg_metrics = {row["leg"]: row for row in leg_rows if row["cost_model"] == PRIMARY_COST_MODEL}
  580. portfolio_rows: list[dict[str, object]] = []
  581. horizon_rows_output: list[dict[str, object]] = []
  582. monthly_frames: list[pd.DataFrame] = []
  583. equity_frames: list[pd.DataFrame] = []
  584. contribution_rows: list[dict[str, object]] = []
  585. combo_index = 0
  586. for leg_count in range(2, min(args.max_leg_count, len(selected)) + 1):
  587. for legs_tuple in combinations(selected, leg_count):
  588. symbols = {key_to_leg[key].symbol for key in legs_tuple}
  589. if not set(PRIMARY_SYMBOLS).issubset(symbols):
  590. continue
  591. if len({key_to_leg[key].family for key in legs_tuple}) < min(2, leg_count):
  592. continue
  593. for mode in ("equal", "risk"):
  594. combo_index += 1
  595. if mode == "equal":
  596. weights = pd.Series(1.0 / leg_count, index=legs_tuple)
  597. else:
  598. raw = pd.Series({key: 1.0 / max(float(leg_metrics[key]["max_drawdown"]), 0.02) for key in legs_tuple})
  599. weights = raw / raw.sum()
  600. name = f"{mode}-{leg_count}-hf{combo_index:05d}"
  601. for cost_model, roundtrip_cost in COST_MODELS.items():
  602. returns = pd.DataFrame({key: selected_daily[cost_model][key].pct_change().fillna(0.0) for key in legs_tuple}).dropna()
  603. portfolio_returns = returns.mul(weights, axis=1).sum(axis=1)
  604. series = explore.INITIAL_EQUITY * (1.0 + portfolio_returns).cumprod()
  605. series.iloc[0] = explore.INITIAL_EQUITY
  606. series.name = name
  607. cost_leg_returns = {key: leg_returns[f"{cost_model}:{key}"] for key in legs_tuple}
  608. weighted_trade_returns = split_weighted_returns(legs_tuple, weights, cost_leg_returns)
  609. monthly = monthly_rows(name, series)
  610. monthly["cost_model"] = cost_model
  611. horizons = horizon_metrics(name, cost_model, series, weighted_trade_returns, monthly)
  612. recent = {row["horizon"]: float(row["total_return"]) for row in horizons}
  613. recent_positive = recent["1y"] > 0.0 and recent["6m"] > 0.0 and recent["3m"] > 0.0
  614. stats = trade_stats(weighted_trade_returns, series.index[0], series.index[-1])
  615. metrics = metrics_from_daily(series)
  616. worst = monthly.sort_values("return").iloc[0]
  617. qualified = (
  618. stats["trades_per_month"] >= MIN_TRADES_PER_MONTH
  619. and metrics["max_drawdown"] < MAX_DRAWDOWN
  620. and recent_positive
  621. )
  622. row = {
  623. "portfolio": name,
  624. "mode": mode,
  625. "leg_count": leg_count,
  626. "legs": ";".join(legs_tuple),
  627. "weights_json": json.dumps({key: float(weights[key]) for key in legs_tuple}, separators=(",", ":")),
  628. "symbols": ",".join(sorted(symbols)),
  629. "families": ",".join(sorted({key_to_leg[key].family for key in legs_tuple})),
  630. "start": series.index[0].strftime("%Y-%m-%d"),
  631. "end": series.index[-1].strftime("%Y-%m-%d"),
  632. "worst_month": str(worst["month"]),
  633. "worst_month_return": float(worst["return"]),
  634. "recent_positive": recent_positive,
  635. "qualified": qualified,
  636. **metrics,
  637. **stats,
  638. **monthly_stability(monthly),
  639. }
  640. add_cost_columns(row, cost_model, roundtrip_cost)
  641. portfolio_rows.append(row)
  642. horizon_rows_output.extend(horizons)
  643. monthly_frames.append(monthly)
  644. equity_frames.append(
  645. pd.DataFrame(
  646. {
  647. "portfolio": name,
  648. "cost_model": cost_model,
  649. "date": series.index.strftime("%Y-%m-%d"),
  650. "equity": series.to_numpy(),
  651. }
  652. )
  653. )
  654. contribution_rows.extend(
  655. leg_contribution_rows(name, cost_model, legs_tuple, weights, cost_leg_returns, series.index[-1])
  656. )
  657. portfolio_total = pd.DataFrame(portfolio_rows).sort_values(
  658. ["cost_model", "qualified", "calmar", "annualized_return", "trades_per_month", "worst_month_return"],
  659. ascending=[True, False, False, False, False, False],
  660. )
  661. primary_top = portfolio_total[portfolio_total["cost_model"] == PRIMARY_COST_MODEL].head(50)
  662. top_pairs = set(zip(primary_top["cost_model"], primary_top["portfolio"]))
  663. top_pairs.update(zip(portfolio_total[portfolio_total["cost_model"] == "taker_taker"].head(50)["cost_model"], portfolio_total[portfolio_total["cost_model"] == "taker_taker"].head(50)["portfolio"]))
  664. horizon = pd.DataFrame(horizon_rows_output)
  665. horizon["horizon"] = pd.Categorical(horizon["horizon"], categories=[label for label, _ in HORIZONS], ordered=True)
  666. horizon = horizon[
  667. horizon.apply(lambda row: (row["cost_model"], row["portfolio"]) in top_pairs, axis=1)
  668. ].sort_values(["cost_model", "portfolio", "horizon"])
  669. portfolio_metadata = portfolio_total[
  670. ["cost_model", "portfolio", "mode", "leg_count", "legs", "weights_json", "symbols", "families", "recent_positive"]
  671. ]
  672. qualified = risk_qualified(horizon).merge(portfolio_metadata, on=["cost_model", "portfolio"], how="left")
  673. robust = robust_survivors(horizon).merge(portfolio_metadata, on=["cost_model", "portfolio"], how="left")
  674. monthly_all = pd.concat(monthly_frames, ignore_index=True)
  675. monthly_all = monthly_all[monthly_all.apply(lambda row: (row["cost_model"], row["portfolio"]) in top_pairs, axis=1)]
  676. worst_months = monthly_all.sort_values("return").head(100)
  677. equity = pd.concat(equity_frames, ignore_index=True)
  678. equity = equity[equity.apply(lambda row: (row["cost_model"], row["portfolio"]) in top_pairs, axis=1)]
  679. leg_contrib = pd.DataFrame(contribution_rows)
  680. leg_contrib = leg_contrib[
  681. leg_contrib.apply(lambda row: (row["cost_model"], row["portfolio"]) in top_pairs, axis=1)
  682. ].sort_values(["cost_model", "portfolio", "horizon", "contribution_return_sum"], ascending=[True, True, True, False])
  683. args.output_dir.mkdir(parents=True, exist_ok=True)
  684. leg_path = args.output_dir / f"{PREFIX}-legs.csv"
  685. total_path = args.output_dir / f"{PREFIX}-total.csv"
  686. qualified_path = args.output_dir / f"{PREFIX}-qualified.csv"
  687. robust_path = args.output_dir / f"{PREFIX}-robust-survivors.csv"
  688. horizon_path = args.output_dir / f"{PREFIX}-horizon.csv"
  689. monthly_path = args.output_dir / f"{PREFIX}-monthly.csv"
  690. worst_path = args.output_dir / f"{PREFIX}-worst-months.csv"
  691. contrib_path = args.output_dir / f"{PREFIX}-leg-contribution.csv"
  692. equity_path = args.output_dir / f"{PREFIX}-equity.csv"
  693. summary_path = args.output_dir / f"{PREFIX}-summary.json"
  694. report_path = args.output_dir / f"{PREFIX}-report.md"
  695. leg_total_all.to_csv(leg_path, index=False)
  696. portfolio_total.to_csv(total_path, index=False)
  697. qualified.to_csv(qualified_path, index=False)
  698. robust.to_csv(robust_path, index=False)
  699. horizon.to_csv(horizon_path, index=False)
  700. monthly_all.to_csv(monthly_path, index=False)
  701. worst_months.to_csv(worst_path, index=False)
  702. leg_contrib.to_csv(contrib_path, index=False)
  703. equity.to_csv(equity_path, index=False)
  704. summary_path.write_text(
  705. json.dumps(
  706. {
  707. "portfolio_count": int(len(portfolio_total)),
  708. "qualified_count": int(len(qualified)),
  709. "robust_survivor_count": int(len(robust)),
  710. "selected_leg_count": int(len(selected)),
  711. "best_portfolio": portfolio_total.iloc[0].to_dict() if len(portfolio_total) else {},
  712. },
  713. indent=2,
  714. default=str,
  715. )
  716. + "\n",
  717. encoding="utf-8",
  718. )
  719. command = f"rtk .venv/bin/python {Path(__file__).as_posix()} --years {args.years} --max-legs-per-symbol {args.max_legs_per_symbol} --max-leg-count {args.max_leg_count}"
  720. output_files = [leg_path, total_path, qualified_path, robust_path, horizon_path, monthly_path, worst_path, contrib_path, equity_path, summary_path, report_path]
  721. report_path.write_text(write_report(command, output_files, portfolio_total, qualified, robust, horizon, leg_contrib), encoding="utf-8")
  722. print(portfolio_total.head(20).to_string(index=False))
  723. return 0
  724. if __name__ == "__main__":
  725. raise SystemExit(main())