search_eth_btc_wick_rejection.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. from __future__ import annotations
  2. import argparse
  3. from dataclasses import dataclass
  4. from pathlib import Path
  5. import pandas as pd
  6. DATA_DIR = Path("data/okx-candles")
  7. OUTPUT_DIR = Path("reports/eth-exploration")
  8. PREFIX = "eth-btc-wick-rejection"
  9. INITIAL_EQUITY = 10_000.0
  10. FEE = 0.0004
  11. ROUNDTRIP_FEE = FEE * 2
  12. HORIZONS = (
  13. ("full", None),
  14. ("3y", pd.DateOffset(years=3)),
  15. ("1y", pd.DateOffset(years=1)),
  16. ("6m", pd.DateOffset(months=6)),
  17. ("3m", pd.DateOffset(months=3)),
  18. )
  19. @dataclass(frozen=True)
  20. class Spec:
  21. symbol: str
  22. bar: str
  23. vol_window: int
  24. min_vol_rank: float
  25. min_upper_wick: float
  26. max_close_pos: float
  27. stop: float
  28. take: float
  29. hold: int
  30. @property
  31. def name(self) -> str:
  32. base = self.symbol.split("-")[0].lower()
  33. return (
  34. f"{base}-upper-wick-{self.bar}-vw{self.vol_window}-vr{self.min_vol_rank:g}"
  35. f"-uw{self.min_upper_wick:g}-cp{self.max_close_pos:g}"
  36. f"-sl{self.stop:g}-tp{self.take:g}-h{self.hold}"
  37. )
  38. def load_frame(symbol: str) -> pd.DataFrame:
  39. frame = pd.read_csv(DATA_DIR / symbol / "15m.csv")
  40. frame["ts"] = pd.to_datetime(frame["ts"], unit="ms", utc=True)
  41. return frame.sort_values("ts").drop_duplicates("ts", keep="last").set_index("ts")
  42. def resample(frame: pd.DataFrame, bar: str) -> pd.DataFrame:
  43. rule = {"15m": "15min", "1h": "1h", "4h": "4h"}[bar]
  44. return (
  45. frame.resample(rule, label="left", closed="left")
  46. .agg(open=("open", "first"), high=("high", "max"), low=("low", "min"), close=("close", "last"), volume=("volume", "sum"))
  47. .dropna()
  48. )
  49. def signal(frame: pd.DataFrame, spec: Spec) -> pd.Series:
  50. candle_range = (frame["high"] - frame["low"]).replace(0, pd.NA)
  51. upper_wick = (frame["high"] - frame[["open", "close"]].max(axis=1)) / candle_range
  52. close_pos = (frame["close"] - frame["low"]) / candle_range
  53. vol_rank = frame["volume"].rolling(spec.vol_window).rank(pct=True)
  54. weak_body = frame["close"] <= frame["open"]
  55. entry = (
  56. weak_body
  57. & (upper_wick >= spec.min_upper_wick)
  58. & (close_pos <= spec.max_close_pos)
  59. & (vol_rank >= spec.min_vol_rank)
  60. )
  61. return entry.fillna(False)
  62. def trade_return(entry: float, exit_: float) -> float:
  63. return entry / exit_ - 1.0 - ROUNDTRIP_FEE
  64. def run_spec(frame: pd.DataFrame, spec: Spec) -> tuple[pd.Series, list[dict[str, object]]]:
  65. entry = signal(frame, spec)
  66. warmup = spec.vol_window + 2
  67. equity = INITIAL_EQUITY
  68. position: dict[str, object] | None = None
  69. pending_entry = False
  70. trades: list[dict[str, object]] = []
  71. curve: list[tuple[pd.Timestamp, float]] = []
  72. rows = list(frame.itertuples())
  73. for index in range(warmup, len(rows)):
  74. candle = rows[index]
  75. ts = frame.index[index]
  76. if pending_entry and position is None and equity > 0:
  77. position = {
  78. "entry_time": ts,
  79. "entry_index": index,
  80. "entry_price": float(candle.open),
  81. "stop": float(candle.open) * (1.0 + spec.stop),
  82. "take": float(candle.open) * (1.0 - spec.take),
  83. }
  84. pending_entry = False
  85. mark = equity
  86. if position is not None:
  87. stop_hit = candle.high >= float(position["stop"])
  88. take_hit = candle.low <= float(position["take"])
  89. held = index - int(position["entry_index"])
  90. if stop_hit or take_hit or held >= spec.hold:
  91. exit_price = float(position["stop"] if stop_hit else position["take"] if take_hit else candle.close)
  92. net = trade_return(float(position["entry_price"]), exit_price)
  93. equity *= 1.0 + net
  94. trades.append({"entry_time": position["entry_time"], "exit_time": ts, "return": net})
  95. position = None
  96. mark = equity
  97. else:
  98. gross = float(position["entry_price"]) / candle.close - 1.0
  99. mark = equity * (1.0 + gross - FEE)
  100. curve.append((ts, mark))
  101. if index == len(rows) - 1 or position is not None:
  102. continue
  103. if bool(entry.iloc[index]):
  104. pending_entry = True
  105. series = pd.Series({ts: value for ts, value in curve}).sort_index()
  106. daily = series.resample("1D").last().ffill()
  107. daily = pd.concat([pd.Series([INITIAL_EQUITY], index=[daily.index[0].normalize()]), daily]).sort_index()
  108. return daily.groupby(level=0).last(), trades
  109. def period_metrics(equity: pd.Series, trades: list[dict[str, object]], offset: pd.DateOffset | None) -> dict[str, object]:
  110. start = equity.index[0] if offset is None else equity.index[-1] - offset
  111. scoped = equity[equity.index >= start]
  112. scoped_trades = [trade for trade in trades if pd.Timestamp(trade["entry_time"]) >= scoped.index[0]]
  113. total = float(scoped.iloc[-1] / scoped.iloc[0] - 1.0)
  114. years = (scoped.index[-1] - scoped.index[0]).total_seconds() / 86_400 / 365
  115. annual = (1.0 + total) ** (1.0 / years) - 1.0 if total > -1.0 and years > 0 else 0.0
  116. drawdown = float(((scoped.cummax() - scoped) / scoped.cummax()).max())
  117. returns = [float(trade["return"]) for trade in scoped_trades]
  118. wins = [value for value in returns if value > 0]
  119. losses = [value for value in returns if value < 0]
  120. profit_factor = sum(wins) / abs(sum(losses)) if losses else (999.0 if wins else 0.0)
  121. return {
  122. "total_return": total,
  123. "annualized_return": annual,
  124. "max_drawdown": drawdown,
  125. "win_rate": len(wins) / len(returns) if returns else 0.0,
  126. "profit_factor": profit_factor,
  127. "trades": len(returns),
  128. }
  129. def monthly_rows(name: str, equity: pd.Series) -> pd.DataFrame:
  130. monthly = equity.resample("ME").last()
  131. frame = pd.DataFrame(
  132. {
  133. "name": name,
  134. "month": monthly.index.strftime("%Y-%m"),
  135. "start_equity": monthly.shift(1).fillna(equity.iloc[0]).to_numpy(),
  136. "end_equity": monthly.to_numpy(),
  137. }
  138. )
  139. frame["return"] = frame["end_equity"] / frame["start_equity"] - 1.0
  140. return frame
  141. def correlation_to_existing(candidate_monthly: pd.DataFrame) -> float | None:
  142. path = Path("reports/eth-exploration/eth-nextgen-micro-portfolio-monthly.csv")
  143. if not path.exists():
  144. return None
  145. base = pd.read_csv(path)
  146. base = base[base["name"] == "equal-2-c0003"][["month", "return"]].rename(columns={"return": "base_return"})
  147. joined = candidate_monthly[["month", "return"]].merge(base, on="month", how="inner")
  148. return None if len(joined) < 6 else float(joined["return"].corr(joined["base_return"]))
  149. def row_for_spec(spec: Spec, equity: pd.Series, trades: list[dict[str, object]]) -> dict[str, object]:
  150. row: dict[str, object] = {"name": spec.name, "symbol": spec.symbol, "bar": spec.bar}
  151. for label, offset in HORIZONS:
  152. metrics = period_metrics(equity, trades, offset)
  153. for key, value in metrics.items():
  154. row[f"{label}_{key}"] = value
  155. return row
  156. def build_specs() -> list[Spec]:
  157. specs: list[Spec] = []
  158. for symbol in ("ETH-USDT-SWAP", "BTC-USDT-SWAP"):
  159. for bar in ("15m", "1h", "4h"):
  160. holds = (12, 24) if bar == "15m" else (8, 16)
  161. for vol_window in (96, 192):
  162. for min_vol_rank in (0.90, 0.95):
  163. for min_upper_wick in (0.45, 0.60):
  164. for stop, take in ((0.012, 0.018), (0.02, 0.03)):
  165. for hold in holds:
  166. specs.append(Spec(symbol, bar, vol_window, min_vol_rank, min_upper_wick, 0.35, stop, take, hold))
  167. return specs
  168. def markdown_table(frame: pd.DataFrame) -> str:
  169. def cell(value: object) -> str:
  170. return f"{value:.4f}" if isinstance(value, float) else str(value).replace("|", "\\|")
  171. rows = [list(frame.columns), ["---" for _ in frame.columns]]
  172. rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
  173. return "\n".join("| " + " | ".join(cell(value) for value in row) + " |" for row in rows)
  174. def report_text(
  175. totals: pd.DataFrame,
  176. selected: pd.Series | None,
  177. corr: float | None,
  178. paths: list[Path],
  179. min_full_trades: int,
  180. min_3m_trades: int,
  181. verdict: str,
  182. ) -> str:
  183. keep = [
  184. "name",
  185. "symbol",
  186. "bar",
  187. "full_total_return",
  188. "full_annualized_return",
  189. "full_max_drawdown",
  190. "full_win_rate",
  191. "full_profit_factor",
  192. "full_trades",
  193. "3y_total_return",
  194. "1y_total_return",
  195. "6m_total_return",
  196. "3m_total_return",
  197. "3m_trades",
  198. ]
  199. if selected is None:
  200. period_table = "No reference candidate."
  201. else:
  202. period_table = markdown_table(
  203. pd.DataFrame(
  204. [
  205. {
  206. "period": label,
  207. "total_return": selected[f"{label}_total_return"],
  208. "annualized_return": selected[f"{label}_annualized_return"],
  209. "max_drawdown": selected[f"{label}_max_drawdown"],
  210. "win_rate": selected[f"{label}_win_rate"],
  211. "profit_factor": selected[f"{label}_profit_factor"],
  212. "trades": selected[f"{label}_trades"],
  213. }
  214. for label, _ in HORIZONS
  215. ]
  216. )
  217. )
  218. corr_text = "n/a" if corr is None else f"{corr:.4f}"
  219. return "\n".join(
  220. [
  221. "# ETH/BTC Wick Rejection Light Screen",
  222. "",
  223. "Scope: read-only local OKX candles; short-only single-candle upper-wick rejection; no staged entry, relative momentum, crash-follow, calendar/time bucket, trend-exhaustion, or false-breakout reversal.",
  224. "",
  225. f"Output files: {', '.join(f'`{path}`' for path in paths)}",
  226. f"Trade-count floor: full>={min_full_trades}, 3m>={min_3m_trades}.",
  227. f"Decision: {verdict}",
  228. f"Monthly return correlation vs `equal-2-c0003` nextgen micro portfolio: {corr_text}.",
  229. "",
  230. "## Reference Metrics",
  231. "",
  232. period_table,
  233. "",
  234. "## Top Candidates",
  235. "",
  236. markdown_table(totals[keep].head(12)),
  237. "",
  238. ]
  239. )
  240. def main() -> int:
  241. parser = argparse.ArgumentParser()
  242. parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
  243. parser.add_argument("--min-full-trades", type=int, default=100)
  244. parser.add_argument("--min-3m-trades", type=int, default=5)
  245. args = parser.parse_args()
  246. raw = {symbol: load_frame(symbol) for symbol in ("ETH-USDT-SWAP", "BTC-USDT-SWAP")}
  247. frames = {(symbol, bar): resample(raw[symbol], bar) for symbol in raw for bar in ("15m", "1h", "4h")}
  248. rows: list[dict[str, object]] = []
  249. equity_by_name: dict[str, pd.Series] = {}
  250. for spec in build_specs():
  251. equity, trades = run_spec(frames[(spec.symbol, spec.bar)], spec)
  252. rows.append(row_for_spec(spec, equity, trades))
  253. equity_by_name[spec.name] = equity
  254. totals = pd.DataFrame(rows).sort_values(
  255. ["full_total_return", "1y_total_return", "full_profit_factor"],
  256. ascending=[False, False, False],
  257. )
  258. trade_eligible = totals[(totals["full_trades"] >= args.min_full_trades) & (totals["3m_trades"] >= args.min_3m_trades)]
  259. viable = trade_eligible[
  260. (trade_eligible["full_total_return"] > 0)
  261. & (trade_eligible["1y_total_return"] > 0)
  262. & (trade_eligible["6m_total_return"] > 0)
  263. & (trade_eligible["3m_total_return"] > 0)
  264. ]
  265. if not viable.empty:
  266. selected = viable.iloc[0]
  267. verdict = "Light-screen pass only: trade count and all required return windows are positive; edge still needs independent validation."
  268. elif not trade_eligible.empty:
  269. selected = trade_eligible.iloc[0]
  270. verdict = "Rejected: trade count is sufficient, but no trade-sufficient candidate has positive full/1y/6m/3m returns."
  271. else:
  272. selected = None
  273. verdict = "Rejected: no candidate met the trade-count floor."
  274. monthly = pd.DataFrame()
  275. corr = None
  276. if selected is not None:
  277. monthly = monthly_rows(str(selected["name"]), equity_by_name[str(selected["name"])])
  278. corr = correlation_to_existing(monthly)
  279. args.output_dir.mkdir(parents=True, exist_ok=True)
  280. totals_path = args.output_dir / f"{PREFIX}-totals.csv"
  281. monthly_path = args.output_dir / f"{PREFIX}-monthly.csv"
  282. report_path = args.output_dir / f"{PREFIX}-report.md"
  283. totals.to_csv(totals_path, index=False)
  284. monthly.to_csv(monthly_path, index=False)
  285. report_path.write_text(
  286. report_text(totals, selected, corr, [totals_path, monthly_path, report_path], args.min_full_trades, args.min_3m_trades, verdict),
  287. encoding="utf-8",
  288. )
  289. print(totals.head(10).to_string(index=False))
  290. print(f"wrote {totals_path}, {monthly_path}, {report_path}")
  291. return 0
  292. if __name__ == "__main__":
  293. raise SystemExit(main())