ignatz
/
okx-codex-trader


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
							from __future__ import annotations

import argparse
import sys
from dataclasses import dataclass
from itertools import combinations
from pathlib import Path

import pandas as pd

ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))

from scripts import explore_ultrashort as explore
from scripts import search_eth_robust_twap_fill_slippage as robust_twap


OUTPUT_DIR = Path("reports/eth-exploration")
PREFIX = "eth-focused-portfolio-conservative"
YEARS = 10.0
COSTS = {
    "maker_taker": 0.0021,
    "taker_taker": 0.0030,
}
HORIZONS = (
    ("3y", pd.DateOffset(years=3)),
    ("1y", pd.DateOffset(years=1)),
    ("6m", pd.DateOffset(months=6)),
    ("3m", pd.DateOffset(months=3)),
)
TARGET_DD = 0.20
TARGET_WORST_MONTH = -0.08


@dataclass(frozen=True)
class Leg:
    family: str
    bar: str
    candidate: object
    pair: bool
    maker_dependent: bool


def leg_name(leg: Leg) -> str:
    if isinstance(leg.candidate, dict):
        return robust_twap.strategy_name(leg.candidate)
    return str(leg.candidate.name)


def leg_key(leg: Leg) -> str:
    return f"{leg.family}:{leg.bar}:{leg_name(leg)}"


def robust_twap_specs() -> list[dict[str, object]]:
    return [
        {
            **robust_twap.BASE_SPEC,
            "fill_buffer": 0.0,
            "price_slippage": 0.0,
            "maker_miss_ratio": 0.25,
        },
        {
            **robust_twap.BASE_SPEC,
            "fill_buffer": 0.0002,
            "price_slippage": 0.0,
            "maker_miss_ratio": 0.25,
        },
        {
            **robust_twap.BASE_SPEC,
            "fill_buffer": 0.0002,
            "price_slippage": 0.0002,
            "maker_miss_ratio": 0.25,
        },
        {
            **robust_twap.BASE_SPEC,
            "fill_buffer": 0.0005,
            "price_slippage": 0.0,
            "maker_miss_ratio": 0.25,
        },
    ]


def build_legs() -> list[Leg]:
    legs: list[Leg] = [
        Leg(
            "eth_btc_rsi_filter",
            "15m",
            explore.build_eth_btc_rsi_filter_candidate(50, 3.0, 55.0, 120, 240, 0.0),
            True,
            False,
        ),
        Leg(
            "eth_btc_rsi_filter",
            "15m",
            explore.build_eth_btc_rsi_filter_candidate(50, 3.0, 55.0, 480, 240, 0.0),
            True,
            False,
        ),
        Leg(
            "btc_lead_eth_lag_15m",
            "15m",
            explore.build_btc_lead_eth_lag_candidate(8, 0.018, 0.006, 8, 0.006, 0.018),
            True,
            False,
        ),
        Leg(
            "btc_lead_eth_lag_15m",
            "15m",
            explore.build_btc_lead_eth_lag_candidate(16, 0.024, 0.006, 32, 0.006, 0.018),
            True,
            False,
        ),
        Leg(
            "btc_lead_eth_lag_5m",
            "5m",
            explore.build_btc_lead_eth_lag_candidate(16, 0.012, 0.006, 32, 0.006, 0.018),
            True,
            False,
        ),
        Leg(
            "btc_lead_eth_lag_5m",
            "5m",
            explore.build_btc_lead_eth_lag_candidate(16, 0.012, 0.006, 8, 0.006, 0.018),
            True,
            False,
        ),
    ]
    legs.extend(Leg("eth_robust_twap", "15m", spec, False, True) for spec in robust_twap_specs())
    return legs


def load_candles(symbol: str, bar: str, years: float) -> list[explore.Candle]:
    candles, _ = explore.load_cached_candles(explore.CANDLE_CACHE_DIR, symbol, bar)
    if not candles:
        raise FileNotFoundError(f"missing cached candles for {symbol} {bar}")
    requested = explore.history_bars_for_years(bar, years)
    return candles[-requested:] if len(candles) > requested else candles


def run_leg(leg: Leg, data: dict[tuple[str, str], list[explore.Candle]]) -> explore.SegmentResult:
    eth = data[("ETH-USDT-SWAP", leg.bar)]
    if isinstance(leg.candidate, dict):
        return robust_twap.run_robust_twap_segment(eth, leg.candidate).result
    btc = data[("BTC-USDT-SWAP", leg.bar)]
    eth_aligned, btc_aligned = explore.align_pair_candles(eth, btc)
    return leg.candidate.run(
        eth_candles=eth_aligned,
        btc_candles=btc_aligned,
        leverage=explore.LEVERAGE,
        warmup_bars=leg.candidate.warmup_bars,
    )


def daily_equity(frame: pd.DataFrame, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
    series = frame.set_index("ts")["equity"].sort_index()
    index = pd.date_range(start.normalize(), end.normalize(), freq="1D", tz="UTC")
    return series.reindex(index.union(series.index)).sort_index().ffill().reindex(index).ffill()


def metrics_from_daily_equity(series: pd.Series) -> dict[str, float]:
    years = (series.index[-1] - series.index[0]).total_seconds() / 86_400 / 365
    total_return = float(series.iloc[-1] / series.iloc[0] - 1.0)
    annualized_return = (1.0 + total_return) ** (1.0 / years) - 1.0 if total_return > -1.0 and years > 0.0 else 0.0
    max_drawdown = explore.max_drawdown_from_equity([float(value) for value in series])
    returns = series.pct_change().dropna()
    daily_std = float(returns.std(ddof=1)) if len(returns) > 1 else 0.0
    sharpe = float(returns.mean()) / daily_std * (365**0.5) if daily_std else 0.0
    return {
        "net_total_return": total_return,
        "net_annualized_return": annualized_return,
        "net_max_drawdown": max_drawdown,
        "net_calmar": annualized_return / max_drawdown if max_drawdown else 0.0,
        "net_sharpe_daily": sharpe,
    }


def horizon_rows(portfolio: str, series: pd.Series) -> list[dict[str, object]]:
    rows: list[dict[str, object]] = []
    end_time = series.index[-1]
    for label, offset in HORIZONS:
        cutoff = end_time - offset
        horizon = series[series.index >= cutoff]
        if len(horizon) < 2:
            horizon = series
        rows.append(
            {
                "portfolio": portfolio,
                "horizon": label,
                "horizon_start": horizon.index[0].strftime("%Y-%m-%d"),
                "horizon_end": horizon.index[-1].strftime("%Y-%m-%d"),
                **metrics_from_daily_equity(horizon),
            }
        )
    return rows


def monthly_rows(portfolio: str, series: pd.Series) -> pd.DataFrame:
    monthly = series.resample("ME").last()
    frame = pd.DataFrame(
        {
            "portfolio": portfolio,
            "month": monthly.index.strftime("%Y-%m"),
            "start_equity": monthly.shift(1).fillna(series.iloc[0]).to_numpy(),
            "end_equity": monthly.to_numpy(),
        }
    )
    frame["return"] = frame["end_equity"] / frame["start_equity"] - 1.0
    return frame


def portfolio_equity(
    *,
    name: str,
    legs: tuple[str, ...],
    mode: str,
    daily: dict[str, pd.Series],
    leg_metrics: dict[str, dict[str, float]],
) -> tuple[pd.Series, pd.Series]:
    returns = pd.DataFrame({leg: daily[leg].pct_change().fillna(0.0) for leg in legs}).dropna()
    if mode == "equal":
        weights = pd.Series(1.0 / len(legs), index=legs)
    else:
        raw = pd.Series({leg: 1.0 / max(leg_metrics[leg]["net_max_drawdown"], 0.01) for leg in legs})
        weights = raw / raw.sum()
    equity = explore.INITIAL_EQUITY * (1.0 + returns.mul(weights, axis=1).sum(axis=1)).cumprod()
    equity.name = name
    return equity, weights


def markdown_table(frame: pd.DataFrame) -> str:
    columns = list(frame.columns)
    rows = [columns, ["---" for _ in columns]]
    rows.extend(frame.astype(object).where(pd.notna(frame), "").values.tolist())
    return "\n".join("| " + " | ".join(format_cell(value) for value in row) + " |" for row in rows)


def format_cell(value: object) -> str:
    if isinstance(value, float):
        return f"{value:.6g}"
    return str(value).replace("|", "\\|")


def markdown_report(
    *,
    command: str,
    output_files: list[Path],
    portfolio_total: pd.DataFrame,
    horizon: pd.DataFrame,
    worst_months: pd.DataFrame,
    leg_total: pd.DataFrame,
) -> str:
    qualified = portfolio_total[portfolio_total["qualified"]].copy()
    top = qualified.head(10) if len(qualified) else portfolio_total.head(10)
    best_text = "No portfolio met 3y/1y/6m/3m all positive, DD < 20%, and worst month > -8%."
    if len(qualified):
        best = qualified.iloc[0]
        best_return = qualified.sort_values("net_annualized_return", ascending=False).iloc[0]
        best_text = (
            f"Found {len(qualified)} qualified portfolios. Lowest-DD qualified `{best['portfolio']}` "
            f"under {best['cost_model']}: annualized={best['net_annualized_return']:.4f}, "
            f"DD={best['net_max_drawdown']:.4f}, worst_month={best['worst_month_return']:.4f}. "
            f"Highest-return qualified `{best_return['portfolio']}` under {best_return['cost_model']}: "
            f"annualized={best_return['net_annualized_return']:.4f}, DD={best_return['net_max_drawdown']:.4f}, "
            f"worst_month={best_return['worst_month_return']:.4f}."
        )
    counts = (
        portfolio_total.groupby(["cost_model", "scope"], as_index=False)
        .agg(portfolios=("portfolio", "count"), qualified=("qualified", "sum"))
        .sort_values(["cost_model", "scope"])
    )
    top_names = set(top["portfolio"])
    top_horizon = horizon[horizon["portfolio"].isin(top_names)].copy()
    lines = [
        "# ETH-focused conservative portfolio search",
        "",
        f"Run command: `{command}`",
        "",
        "Output files:",
        *[f"- `{path}`" for path in output_files],
        "",
        "Scope: one continuous cached ETH/BTC intersection, requested 10 years; 3y/1y/6m/3m are sliced from the same portfolio equity curves.",
        "Costs: maker_taker and taker_taker only.",
        "Candidates: ETH/BTC RSI filter, BTC lead ETH lag 5m/15m, ETH robust TWAP stress variants.",
        "Portfolio scopes: all_legs and no_maker_dependent.",
        "",
        f"Decision: {best_text}",
        "",
        "## Qualification counts",
        "",
        markdown_table(counts),
        "",
        "## Top portfolios by conservative sort",
        "",
        "Sort: qualified first, then cost/scope, then lowest DD, worst month, minimum horizon return, and annualized return.",
        "",
        markdown_table(
            top[
                [
                    "qualified",
                    "portfolio",
                    "cost_model",
                    "scope",
                    "mode",
                    "leg_count",
                    "net_total_return",
                    "net_annualized_return",
                    "net_max_drawdown",
                    "worst_month_return",
                    "min_horizon_total_return",
                    "max_horizon_drawdown",
                ]
            ]
        ),
        "",
        "## Horizon checks for top portfolios",
        "",
        markdown_table(
            top_horizon[
                [
                    "portfolio",
                    "cost_model",
                    "horizon",
                    "net_total_return",
                    "net_annualized_return",
                    "net_max_drawdown",
                ]
            ]
        ),
        "",
        "## Worst months",
        "",
        markdown_table(worst_months.head(20)),
        "",
        "## Leg totals",
        "",
        markdown_table(
            leg_total[
                [
                    "leg_key",
                    "cost_model",
                    "family",
                    "bar",
                    "maker_dependent",
                    "trades",
                    "net_total_return",
                    "net_annualized_return",
                    "net_max_drawdown",
                ]
            ]
        ),
    ]
    return "\n".join(lines) + "\n"


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--years", type=float, default=YEARS)
    parser.add_argument("--output-dir", type=Path, default=OUTPUT_DIR)
    parser.add_argument("--max-leg-count", type=int, default=4)
    args = parser.parse_args()

    legs = build_legs()
    bars = sorted({leg.bar for leg in legs})
    data = {
        (symbol, bar): load_candles(symbol, bar, args.years)
        for bar in bars
        for symbol in ("ETH-USDT-SWAP", "BTC-USDT-SWAP")
    }

    results: dict[str, tuple[Leg, explore.SegmentResult]] = {}
    for index, leg in enumerate(legs, start=1):
        key = leg_key(leg)
        results[key] = (leg, run_leg(leg, data))
        print(f"done {index}/{len(legs)} {key}", flush=True)

    start = max(pd.to_datetime(result.equity_curve[0]["ts"], unit="ms", utc=True) for _, result in results.values())
    end = min(pd.to_datetime(result.equity_curve[-1]["ts"], unit="ms", utc=True) for _, result in results.values())

    daily_by_cost: dict[str, dict[str, pd.Series]] = {cost: {} for cost in COSTS}
    metrics_by_cost: dict[str, dict[str, dict[str, float]]] = {cost: {} for cost in COSTS}
    leg_rows: list[dict[str, object]] = []
    for key, (leg, result) in results.items():
        for cost_model, cost_value in COSTS.items():
            frame = explore.cost_adjusted_trade_equity_frame(result, cost_value)
            daily = daily_equity(frame, start, end)
            metrics = metrics_from_daily_equity(daily)
            daily_by_cost[cost_model][key] = daily
            metrics_by_cost[cost_model][key] = metrics
            leg_rows.append(
                {
                    "leg_key": key,
                    "cost_model": cost_model,
                    "roundtrip_cost_on_margin": cost_value,
                    "family": leg.family,
                    "bar": leg.bar,
                    "maker_dependent": leg.maker_dependent,
                    "name": leg_name(leg),
                    "first_candle": start.strftime("%Y-%m-%d %H:%M"),
                    "last_candle": end.strftime("%Y-%m-%d %H:%M"),
                    "years": (end - start).total_seconds() / 86_400 / 365,
                    "trades": result.trade_count,
                    "gross_total_return": result.total_return,
                    "gross_max_drawdown_mark_to_market": result.max_drawdown,
                    **metrics,
                }
            )

    portfolio_rows: list[dict[str, object]] = []
    horizon_output: list[dict[str, object]] = []
    monthly_frames: list[pd.DataFrame] = []
    equity_frames: list[pd.DataFrame] = []
    all_keys = list(results.keys())
    combo_index = 0
    for cost_model, daily in daily_by_cost.items():
        for scope, keys in (
            ("all_legs", all_keys),
            ("no_maker_dependent", [key for key in all_keys if not results[key][0].maker_dependent]),
        ):
            max_leg_count = min(args.max_leg_count, len(keys))
            for leg_count in range(2, max_leg_count + 1):
                for selected in combinations(keys, leg_count):
                    if len({results[key][0].family for key in selected}) != leg_count:
                        continue
                    for mode in ("equal", "risk"):
                        combo_index += 1
                        families = "+".join(results[key][0].family for key in selected)
                        portfolio = f"{scope}-{mode}-{leg_count}-c{combo_index:04d}-{families}"
                        series, weights = portfolio_equity(
                            name=portfolio,
                            legs=selected,
                            mode=mode,
                            daily=daily,
                            leg_metrics=metrics_by_cost[cost_model],
                        )
                        metrics = metrics_from_daily_equity(series)
                        monthly = monthly_rows(portfolio, series)
                        worst_month = float(monthly["return"].min())
                        current_horizons = horizon_rows(portfolio, series)
                        horizon_min_return = min(float(row["net_total_return"]) for row in current_horizons)
                        horizon_max_dd = max(float(row["net_max_drawdown"]) for row in current_horizons)
                        all_horizons_positive = horizon_min_return > 0.0
                        qualified = (
                            all_horizons_positive
                            and metrics["net_max_drawdown"] < TARGET_DD
                            and worst_month > TARGET_WORST_MONTH
                        )
                        portfolio_rows.append(
                            {
                                "portfolio": portfolio,
                                "cost_model": cost_model,
                                "scope": scope,
                                "mode": mode,
                                "leg_count": leg_count,
                                "legs": ";".join(selected),
                                "weights": ";".join(f"{key}={weights[key]:.8f}" for key in selected),
                                "first_candle": start.strftime("%Y-%m-%d %H:%M"),
                                "last_candle": end.strftime("%Y-%m-%d %H:%M"),
                                "years": (end - start).total_seconds() / 86_400 / 365,
                                "worst_month_return": worst_month,
                                "all_horizons_positive": all_horizons_positive,
                                "min_horizon_total_return": horizon_min_return,
                                "max_horizon_drawdown": horizon_max_dd,
                                "qualified": qualified,
                                **metrics,
                            }
                        )
                        for row in current_horizons:
                            horizon_output.append({"cost_model": cost_model, "scope": scope, **row})
                        monthly_frames.append(monthly.assign(cost_model=cost_model, scope=scope))
                        equity_frames.append(
                            pd.DataFrame(
                                {
                                    "portfolio": portfolio,
                                    "cost_model": cost_model,
                                    "scope": scope,
                                    "date": series.index.strftime("%Y-%m-%d"),
                                    "equity": series.to_numpy(),
                                }
                            )
                        )

    leg_total = pd.DataFrame(leg_rows).sort_values(["cost_model", "net_calmar", "net_annualized_return"], ascending=[True, False, False])
    portfolio_total = pd.DataFrame(portfolio_rows).sort_values(
        [
            "qualified",
            "cost_model",
            "scope",
            "net_max_drawdown",
            "worst_month_return",
            "min_horizon_total_return",
            "net_annualized_return",
        ],
        ascending=[False, True, True, True, False, False, False],
    )
    top_names = set(portfolio_total.head(25)["portfolio"])
    horizon = pd.DataFrame(horizon_output)
    horizon["horizon"] = pd.Categorical(horizon["horizon"], categories=["3y", "1y", "6m", "3m"], ordered=True)
    horizon = horizon[horizon["portfolio"].isin(top_names)].sort_values(["cost_model", "portfolio", "horizon"])
    monthly = pd.concat(monthly_frames, ignore_index=True)
    worst_months = monthly[monthly["portfolio"].isin(top_names)].sort_values("return").head(100)
    equity = pd.concat(equity_frames, ignore_index=True)
    equity = equity[equity["portfolio"].isin(top_names)]

    args.output_dir.mkdir(parents=True, exist_ok=True)
    leg_path = args.output_dir / f"{PREFIX}-legs.csv"
    total_path = args.output_dir / f"{PREFIX}-total.csv"
    qualified_path = args.output_dir / f"{PREFIX}-qualified.csv"
    horizon_path = args.output_dir / f"{PREFIX}-horizon.csv"
    worst_path = args.output_dir / f"{PREFIX}-worst-months.csv"
    equity_path = args.output_dir / f"{PREFIX}-equity.csv"
    report_path = args.output_dir / f"{PREFIX}-report.md"

    leg_total.to_csv(leg_path, index=False)
    portfolio_total.to_csv(total_path, index=False)
    portfolio_total[portfolio_total["qualified"]].to_csv(qualified_path, index=False)
    horizon.to_csv(horizon_path, index=False)
    worst_months.to_csv(worst_path, index=False)
    equity.to_csv(equity_path, index=False)
    command = f"rtk .venv/bin/python {Path(__file__).as_posix()} --years {args.years} --max-leg-count {args.max_leg_count}"
    report_path.write_text(
        markdown_report(
            command=command,
            output_files=[leg_path, total_path, qualified_path, horizon_path, worst_path, equity_path, report_path],
            portfolio_total=portfolio_total,
            horizon=horizon,
            worst_months=worst_months,
            leg_total=leg_total,
        ),
        encoding="utf-8",
    )

    print(portfolio_total.head(20).to_string(index=False))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())