diff --git a/README.md b/README.md index c0b1f8e..de393a2 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![CI](https://github.com/OnePunchMonk/AgentQuant/actions/workflows/ci.yml/badge.svg)](https://github.com/OnePunchMonk/AgentQuant/actions) ![Python](https://img.shields.io/badge/python-3.10%2B-blue) -![Tests](https://img.shields.io/badge/tests-42%20passed-brightgreen) +![Tests](https://img.shields.io/badge/tests-55%20passed-brightgreen) --- @@ -20,6 +20,30 @@ AgentQuant is a regime-adaptive research platform that runs a real **ReAct agent --- +## Platform Preview + +### Live Data Selection + +Choose a date range, select preset stocks/ETFs, or type any yfinance ticker. AgentQuant fetches data on demand and only uses the local cache when it covers the requested range. + +![Live data sidebar](screenshots/live_data_sidebar_desktop.jpg) + +### Research Workspace + +The dashboard tracks experiment runs, baselines, robustness scores, validation checks, and report-ready research notes in one place. + +![Research workspace](screenshots/research_workspace_desktop.jpg) + +### Alpha + NLA Memory + +Agent Lab stores backtested alpha candidates and explicit NLA-style research narratives so future runs can retrieve prior evidence. NLA memory is based on explicit activation narratives or imported `nla-gemma4` JSONL outputs, not hidden chain-of-thought. + +![NLA memory](screenshots/nla_memory_desktop.jpg) + +![Agent Lab NLA memory](screenshots/agent_lab_nla_memory_desktop.jpg) + +--- + ## Architecture ``` @@ -40,6 +64,9 @@ analyze ──► hypothesize ──► backtest ──► reflect | `src/agent/context_builder.py` | `RegimeContext` dataclass with VIX percentile, multi-horizon momentum | | `src/agent/parameter_grid.py` | Canonical grids per strategy; regime-aware prior selection | | `src/agent/strategy_memory.py` | SQLite cross-session memory | +| `src/research/alpha_store.py` | SQLite memory for accepted, watchlisted, and rejected alpha candidates | +| `src/research/nla_memory.py` | Explicit NLA-style narrative memory and `nla-gemma4` JSONL ingestion | +| `src/research/workspace.py` | Experiment registry, robustness summaries, and research memo generation | | `src/features/regime.py` | Percentile-based regime detection + optional HMM | | `src/features/engine.py` | RSI, MACD, Bollinger, ATR, multi-horizon vol, stationarity checks | | `src/features/lookback_guard.py` | `WarmupEnforcer` prevents look-ahead bias | @@ -108,14 +135,18 @@ pip install -e ".[dev]" pytest tests/ -v ``` -**42 tests passing** across: +**55 tests passing** across: - `test_config.py` — Pydantic validation +- `test_data_ingest.py` — live ticker fetch and cache range coverage - `test_metrics.py` — Sharpe, drawdown, Calmar, Sortino - `test_regime.py` — VIX percentile regime classification - `test_features.py` — RSI bounds, momentum accuracy, new indicator columns - `test_strategies.py` — All 6 strategies produce valid `{-1,0,1}` signals - `test_backtest.py` — Runner, zero-signal flat equity, metrics keys - `test_proposal_generator.py` — Fallback chain without API key +- `test_alpha_store.py` — alpha memory persistence and retrieval +- `test_nla_memory.py` — explicit NLA memory and JSONL ingestion +- `test_research_workspace.py` — experiment registry summaries and memos --- @@ -136,6 +167,10 @@ AgentQuant/ │ ├── data/ │ │ ├── ingest.py # yfinance + FRED with TTL cache │ │ └── schemas.py # Data schemas +│ ├── research/ +│ │ ├── alpha_store.py # SQLite alpha candidate memory +│ │ ├── nla_memory.py # Explicit NLA narrative memory +│ │ └── workspace.py # Experiment registry + research memos │ ├── features/ │ │ ├── engine.py # RSI, MACD, Bollinger, ATR, multi-horizon vol │ │ ├── regime.py # VIX-percentile + optional HMM detection @@ -158,7 +193,7 @@ AgentQuant/ ├── experiments/ │ ├── results_store.py # SQLite experiment tracking │ └── walk_forward.py # Walk-forward validation -├── tests/ # 42 tests +├── tests/ # 55 tests ├── docs/ # Documentation ├── config.yaml # Project configuration ├── .env.example # Environment template diff --git a/pyproject.toml b/pyproject.toml index 913777e..38cfc76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "scipy>=1.12", "streamlit>=1.39", "plotly>=5.17", + "matplotlib>=3.8", "pyarrow>=16.0", "tabulate>=0.9", "statsmodels>=0.14", @@ -67,4 +68,4 @@ ignore = ["E501"] [tool.setuptools.packages.find] where = ["."] -include = ["src*"] \ No newline at end of file +include = ["src*"] diff --git a/screenshots/agent_lab_nla_memory_desktop.jpg b/screenshots/agent_lab_nla_memory_desktop.jpg new file mode 100644 index 0000000..5e0ca1e Binary files /dev/null and b/screenshots/agent_lab_nla_memory_desktop.jpg differ diff --git a/screenshots/live_data_sidebar_desktop.jpg b/screenshots/live_data_sidebar_desktop.jpg new file mode 100644 index 0000000..b4b0ba4 Binary files /dev/null and b/screenshots/live_data_sidebar_desktop.jpg differ diff --git a/screenshots/nla_memory_desktop.jpg b/screenshots/nla_memory_desktop.jpg new file mode 100644 index 0000000..f57418f Binary files /dev/null and b/screenshots/nla_memory_desktop.jpg differ diff --git a/screenshots/research_workspace_desktop.jpg b/screenshots/research_workspace_desktop.jpg new file mode 100644 index 0000000..9374c8c Binary files /dev/null and b/screenshots/research_workspace_desktop.jpg differ diff --git a/src/agent/agent_graph.py b/src/agent/agent_graph.py index d2098ee..0ee13b8 100644 --- a/src/agent/agent_graph.py +++ b/src/agent/agent_graph.py @@ -11,16 +11,15 @@ import json import logging -from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, TypedDict -import numpy as np import pandas as pd from src.agent.context_builder import RegimeContext, build_context from src.agent.proposal_generator import Proposal, ProposalGenerator from src.agent.strategy_memory import PastResult, StrategyMemory -from src.backtest.metrics import PerformanceMetrics +from src.research.alpha_store import AlphaStore +from src.research.nla_memory import NLAMemoryStore from src.utils.config import config logger = logging.getLogger(__name__) @@ -60,10 +59,16 @@ def analyze_node(state: AgentState) -> AgentState: # Get memory context memory = StrategyMemory() memory_ctx = memory.to_prompt_context(regime_label, state.get("strategy_type", "momentum")) + alpha_memory = AlphaStore() + alpha_ctx = alpha_memory.to_prompt_context(regime_label, state.get("strategy_type", "momentum")) + nla_memory = NLAMemoryStore() + nla_ctx = nla_memory.to_prompt_context(regime_label, state.get("strategy_type", "momentum")) + context.alpha_memory_context = alpha_ctx + context.nla_memory_context = nla_ctx state["features_df"] = features_df state["context"] = context - state["memory_context"] = memory_ctx + state["memory_context"] = f"{memory_ctx}\n\n{alpha_ctx}\n\n{nla_ctx}" state["run_log"] = state.get("run_log", []) state["run_log"].append(f"Regime: {regime_label} (confidence: {context.regime_confidence:.0%})") @@ -212,8 +217,40 @@ def store_node(state: AgentState) -> AgentState: reasoning=best.get("reasoning", ""), ) run_id = memory.store(result) - state["run_log"].append(f"Store: Persisted result {run_id} to memory.") - logger.info("Persisted result %s to strategy memory.", run_id) + alpha = AlphaStore().store_backtest_result( + regime=regime, + strategy_type=state.get("strategy_type", "momentum"), + params=best["params"], + metrics={ + "sharpe_ratio": best.get("sharpe", 0.0), + "total_return": best.get("total_return", 0.0), + "max_drawdown": best.get("max_drawdown", 0.0), + "num_trades": best.get("num_trades", 0), + }, + assets=[state.get("asset", config.reference_asset)], + generation_method=best.get("generation_method", ""), + confidence=best.get("confidence", 0.0), + reasoning=best.get("reasoning", ""), + source="agent_graph", + ) + nla = NLAMemoryStore().store_agent_summary( + regime=regime, + strategy_type=state.get("strategy_type", "momentum"), + params=best["params"], + metrics={ + "sharpe_ratio": best.get("sharpe", 0.0), + "total_return": best.get("total_return", 0.0), + "max_drawdown": best.get("max_drawdown", 0.0), + "num_trades": best.get("num_trades", 0), + }, + narrative=best.get("reasoning", "") or "Stored best proposal from explicit agent run.", + alpha_id=alpha.alpha_id, + tags=("agent_graph", best.get("generation_method", "")), + ) + state["run_log"].append( + f"Store: Persisted result {run_id}, alpha {alpha.alpha_id}, NLA note {nla.record_id}." + ) + logger.info("Persisted result %s, alpha %s, NLA note %s.", run_id, alpha.alpha_id, nla.record_id) return state diff --git a/src/agent/context_builder.py b/src/agent/context_builder.py index 6c300e2..acc3308 100644 --- a/src/agent/context_builder.py +++ b/src/agent/context_builder.py @@ -7,10 +7,8 @@ """ import logging -from dataclasses import dataclass, field -from typing import Optional +from dataclasses import dataclass -import numpy as np import pandas as pd from scipy import stats as scipy_stats @@ -36,10 +34,12 @@ class RegimeContext: rsi_14: float = 50.0 price_vs_sma200: float = 0.0 regime_confidence: float = 0.5 + alpha_memory_context: str = "" + nla_memory_context: str = "" def to_prompt_string(self) -> str: """Format context as structured text for LLM prompt injection.""" - return ( + context = ( f"MARKET CONTEXT:\n" f" Regime: {self.regime_label} (confidence: {self.regime_confidence:.0%})\n" f" VIX: {self.vix_level:.1f} (at {self.vix_percentile:.0f}th percentile, trailing 1Y)\n" @@ -55,6 +55,11 @@ def to_prompt_string(self) -> str: f" RSI (14): {self.rsi_14:.1f}\n" f" Drawdown from peak: {self.drawdown_from_peak * 100:.1f}%\n" ) + if self.alpha_memory_context: + context += f"\n{self.alpha_memory_context}\n" + if self.nla_memory_context: + context += f"\n{self.nla_memory_context}\n" + return context def build_context(features_df: pd.DataFrame) -> RegimeContext: diff --git a/src/agent/proposal_generator.py b/src/agent/proposal_generator.py index ad6b6d4..2abb624 100644 --- a/src/agent/proposal_generator.py +++ b/src/agent/proposal_generator.py @@ -12,6 +12,7 @@ from src.agent.base_planner import BasePlanner, create_planner from src.agent.context_builder import RegimeContext from src.agent.parameter_grid import ParameterGrid +from src.research.alpha_store import AlphaStore logger = logging.getLogger(__name__) @@ -94,10 +95,17 @@ class ProposalGenerator: Fallback chain: LLM → GridSearch → Random. """ - def __init__(self, planner: Optional[BasePlanner] = None): + def __init__( + self, + planner: Optional[BasePlanner] = None, + alpha_store: Optional[AlphaStore] = None, + use_alpha_memory: bool = True, + ): self.planner = planner or create_planner() self.grid = ParameterGrid() self.validator = ProposalValidator() + self.alpha_store = alpha_store or AlphaStore() + self.use_alpha_memory = use_alpha_memory def generate( self, @@ -120,6 +128,19 @@ def generate( if len(proposals) < n_proposals: needed = n_proposals - len(proposals) existing_params = {tuple(sorted(p.params.items())) for p in proposals} + rejected_params = self._rejected_param_keys(context, strategy_type) + + if self.use_alpha_memory: + memory_proposals = self._memory_generate(context, strategy_type, needed) + for mp in memory_proposals: + if len(proposals) >= n_proposals: + break + key = tuple(sorted(mp.params.items())) + if key not in existing_params: + proposals.append(mp) + existing_params.add(key) + + needed = n_proposals - len(proposals) grid_proposals = self.grid.top_k_by_prior( strategy_type, needed + 3, context.regime_label ) @@ -127,7 +148,7 @@ def generate( if len(proposals) >= n_proposals: break key = tuple(sorted(gp.items())) - if key not in existing_params: + if key not in existing_params and key not in rejected_params: proposals.append(Proposal( params=gp, confidence=0.3, @@ -139,12 +160,13 @@ def generate( # Last resort: random from grid if len(proposals) < n_proposals: needed = n_proposals - len(proposals) + rejected_params = self._rejected_param_keys(context, strategy_type) for rp in self.grid.random_k(strategy_type, needed + 5): if len(proposals) >= n_proposals: break existing_params_set = {tuple(sorted(p.params.items())) for p in proposals} key = tuple(sorted(rp.items())) - if key not in existing_params_set: + if key not in existing_params_set and key not in rejected_params: proposals.append(Proposal( params=rp, confidence=0.1, @@ -154,6 +176,53 @@ def generate( return proposals[:n_proposals] + def _memory_generate( + self, + context: RegimeContext, + strategy_type: str, + n: int, + ) -> List[Proposal]: + if n <= 0: + return [] + + grid_keys = { + tuple(sorted(params.items())) + for params in self.grid.get_grid(strategy_type) + } + proposals: List[Proposal] = [] + candidates = self.alpha_store.recall( + regime=context.regime_label, + strategy_type=strategy_type, + statuses=("accepted", "watch"), + n=n, + ) + + for candidate in candidates: + key = tuple(sorted(candidate.params.items())) + if grid_keys and key not in grid_keys: + continue + proposals.append( + Proposal( + params=candidate.params, + confidence=max(candidate.confidence, 0.55), + regime_characteristic_used="alpha_memory", + reasoning=f"Retrieved from alpha DB: {candidate.thesis}", + generation_method="alpha_memory", + ) + ) + return proposals + + def _rejected_param_keys(self, context: RegimeContext, strategy_type: str) -> set: + if not self.use_alpha_memory: + return set() + rejected = self.alpha_store.recall( + regime=context.regime_label, + strategy_type=strategy_type, + statuses=("rejected",), + n=50, + ) + return {tuple(sorted(candidate.params.items())) for candidate in rejected} + def _llm_generate( self, context: RegimeContext, strategy_type: str, n: int ) -> List[Proposal]: diff --git a/src/app/streamlit_app.py b/src/app/streamlit_app.py index 69fc0c3..b861daf 100644 --- a/src/app/streamlit_app.py +++ b/src/app/streamlit_app.py @@ -15,23 +15,30 @@ """ import logging -import os from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional +from pathlib import Path +from typing import Any, Dict, List import matplotlib.pyplot as plt -import numpy as np import pandas as pd +import plotly.express as px import streamlit as st from src.agent.context_builder import build_context from src.agent.parameter_grid import ParameterGrid from src.agent.proposal_generator import ProposalGenerator -from src.backtest.metrics import PerformanceMetrics from src.backtest.runner import run_backtest from src.data.ingest import fetch_ohlcv_data from src.features.engine import compute_features -from src.features.regime import detect_regime, detect_regime_full +from src.features.regime import detect_regime_full +from src.research.alpha_store import AlphaCandidate, AlphaStore +from src.research.nla_memory import NLAMemoryStore, NLARecord +from src.research.workspace import ( + build_research_memo, + load_research_workspace, + runs_to_dataframe, + summarize_workspace, +) from src.strategies.strategy_registry import STRATEGY_REGISTRY from src.utils.config import config from src.utils.logging import setup_logging @@ -61,6 +68,15 @@ .regime-crisis { background: #f8d7da; color: #721c24; border: 2px solid #721c24; } .regime-neutral{ background: #fff3cd; color: #856404; } .metric-card { background: #f8f9fa; border-radius: 8px; padding: 0.8rem; margin: 0.3rem; } + .workspace-note { + border-left: 4px solid #1f77b4; + padding: 0.8rem 1rem; + background: #f6f8fa; + border-radius: 6px; + } + .status-pass { color: #116329; font-weight: 700; } + .status-warn { color: #9a6700; font-weight: 700; } + .status-fail { color: #cf222e; font-weight: 700; } """, unsafe_allow_html=True) @@ -68,11 +84,16 @@ # ─── Cached helpers ──────────────────────────────────────────────────────────── @st.cache_data(ttl=3600, show_spinner="Fetching market data…") -def _fetch_data_cached(assets: tuple, start: str, end: str) -> Dict[str, pd.DataFrame]: +def _fetch_data_cached( + assets: tuple, + start: str, + end: str, + force_download: bool = False, +) -> Dict[str, pd.DataFrame]: """Cache market data for 1 hour to avoid re-downloading on every rerun.""" all_data: Dict[str, pd.DataFrame] = {} for ticker in assets: - result = fetch_ohlcv_data(ticker, start, end) + result = fetch_ohlcv_data(ticker, start, end, force_download=force_download) if ticker in result: all_data[ticker] = result[ticker] return all_data @@ -105,16 +126,173 @@ def _regime_badge(regime_label: str) -> str: return f'
📊 Market Regime: {regime_label}
' +def _format_pct(value: float) -> str: + return f"{value * 100:.1f}%" + + +def _status_html(status: str) -> str: + status_l = status.lower() + label = {"pass": "Pass", "warn": "Review", "fail": "Fail"}.get(status_l, status.title()) + return f'{label}' + + +def _normalize_tickers(tickers: List[str]) -> List[str]: + normalized = [] + seen = set() + for ticker in tickers: + clean = ticker.strip().upper() + if not clean or clean in seen: + continue + seen.add(clean) + normalized.append(clean) + return normalized + + +def _alpha_candidates_to_dataframe(candidates: List[AlphaCandidate]) -> pd.DataFrame: + rows = [candidate.as_row() for candidate in candidates] + if not rows: + return pd.DataFrame() + df = pd.DataFrame(rows) + for col in ("Return", "Max Drawdown"): + if col in df: + df[col] = df[col].map(_format_pct) + return df + + +def _nla_records_to_dataframe(records: List[NLARecord]) -> pd.DataFrame: + rows = [record.as_row() for record in records] + if not rows: + return pd.DataFrame() + return pd.DataFrame(rows) + + +def render_research_workspace() -> None: + """Render the platform-style experiment registry.""" + runs = load_research_workspace( + experiments_dir=Path("experiments"), + results_db_path=Path(config.results_db_path), + ) + summary = summarize_workspace(runs) + + st.header("Research Workspace") + st.caption( + "A local-first registry for experiments, baselines, validation checks, and report-ready research memos." + ) + + if not runs: + st.info("No experiment artifacts found yet. Run a walk-forward study or backtest to populate the workspace.") + return + + best_run = summary["best_run"] + k1, k2, k3, k4 = st.columns(4) + k1.metric("Tracked Runs", summary["run_count"]) + k2.metric("Best Sharpe", f"{summary['best_sharpe']:.3f}") + k3.metric("Best Robustness", f"{summary['best_robustness']:.3f}") + k4.metric("Validation Pass Rate", _format_pct(summary["validation_pass_rate"])) + + if best_run: + st.markdown( + f""" +
+ Current leader: {best_run.name} with robustness + {best_run.robustness_score:.3f}. Use this as the anchor run when comparing new agent or swarm experiments. +
+ """, + unsafe_allow_html=True, + ) + + df_runs = runs_to_dataframe(runs) + display_df = df_runs.copy() + for col in ("Return", "Max Drawdown"): + if col in display_df: + display_df[col] = display_df[col].map(_format_pct) + + st.subheader("Experiment Registry") + st.dataframe(display_df, use_container_width=True, hide_index=True) + + chart_df = df_runs.copy() + if not chart_df.empty: + min_robustness = chart_df["Robustness"].min() + chart_df["Marker Size"] = (chart_df["Robustness"] - min_robustness + 0.1).clip(lower=0.1) + st.subheader("Robustness Map") + fig = px.scatter( + chart_df, + x="Max Drawdown", + y="Sharpe", + size="Marker Size", + color="Mode", + hover_name="Name", + hover_data=["Strategy", "Source", "Validation", "Robustness"], + title="Sharpe vs. Drawdown by Research Run", + ) + fig.update_layout(height=420, margin=dict(l=10, r=10, t=50, b=10)) + st.plotly_chart(fig, use_container_width=True) + + st.subheader("Run Inspector") + run_lookup = {f"{run.name} ({run.run_id})": run for run in runs} + selected_label = st.selectbox("Select a research run", list(run_lookup.keys())) + selected = run_lookup[selected_label] + + left, right = st.columns([1, 1]) + with left: + st.markdown(build_research_memo(selected)) + + with right: + st.markdown("### Validation") + for check in selected.validation_checks: + st.markdown( + f"- {_status_html(check.status)} **{check.name}:** {check.detail}", + unsafe_allow_html=True, + ) + + st.markdown("### Artifacts") + for artifact in selected.artifacts: + st.code(artifact) + + st.subheader("Alpha Memory") + alpha_store = AlphaStore() + alpha_candidates = alpha_store.list_recent(25) + if alpha_candidates: + accepted = sum(1 for alpha in alpha_candidates if alpha.status == "accepted") + watch = sum(1 for alpha in alpha_candidates if alpha.status == "watch") + rejected = sum(1 for alpha in alpha_candidates if alpha.status == "rejected") + a1, a2, a3, a4 = st.columns(4) + a1.metric("Stored Alphas", len(alpha_candidates)) + a2.metric("Accepted", accepted) + a3.metric("Watchlist", watch) + a4.metric("Rejected", rejected) + st.dataframe(_alpha_candidates_to_dataframe(alpha_candidates), use_container_width=True, hide_index=True) + else: + st.info("Alpha memory is empty. Run Agent Lab to generate and persist candidates.") + + st.subheader("NLA Memory") + nla_store = NLAMemoryStore() + nla_records = nla_store.list_recent(25) + if nla_records: + n1, n2, n3 = st.columns(3) + n1.metric("Stored NLA Notes", len(nla_records)) + n2.metric("Avg Quality", f"{sum(r.quality_score for r in nla_records) / len(nla_records):.3f}") + n3.metric("Gemma/NLA Imports", sum(1 for r in nla_records if "nla" in r.source_model.lower())) + st.dataframe(_nla_records_to_dataframe(nla_records), use_container_width=True, hide_index=True) + else: + st.info("NLA memory is empty. Agent Lab will write explicit summaries; Gemma4 NLA JSONL can be imported later.") + + +def render_agent_memory_context(regime_label: str, strategy_type: str) -> None: + alpha_context = AlphaStore().to_prompt_context(regime_label, strategy_type, n=5) + nla_context = NLAMemoryStore().to_prompt_context(regime_label, strategy_type, n=5) + with st.expander("Alpha memory used for this run", expanded=False): + st.code(alpha_context) + with st.expander("NLA memory used for this run", expanded=False): + st.code(nla_context) + + # ─── Sidebar ─────────────────────────────────────────────────────────────────── def render_sidebar() -> Dict[str, Any]: st.sidebar.title("⚙️ AgentQuant") st.sidebar.markdown("---") - available_assets = [f.stem for f in ( - __import__("pathlib").Path(config.data_path).glob("*.parquet") - ) if not f.stem.startswith("FRED_")] or config.universe - st.sidebar.header("Date Range") today = datetime.now() end_default = today - timedelta(days=1) @@ -124,11 +302,35 @@ def render_sidebar() -> Dict[str, Any]: end_date = st.sidebar.date_input("End Date", value=end_default, max_value=today) st.sidebar.header("Assets") - selected_assets = st.sidebar.multiselect( - "Select Assets", - options=available_assets, - default=available_assets[:4] if len(available_assets) >= 4 else available_assets, + starter_assets = _normalize_tickers( + config.universe + + [ + "AAPL", + "MSFT", + "NVDA", + "AMZN", + "META", + "GOOGL", + "TSLA", + "JPM", + "XOM", + "BTC-USD", + "ETH-USD", + ] + ) + selected_presets = st.sidebar.multiselect( + "Choose stocks or ETFs", + options=starter_assets, + default=config.universe[:4] if len(config.universe) >= 4 else config.universe, + ) + custom_tickers = st.sidebar.text_input( + "Add tickers", + value="", + placeholder="e.g. AAPL, MSFT, NVDA", ) + custom_assets = custom_tickers.replace("\n", ",").split(",") + selected_assets = _normalize_tickers(selected_presets + custom_assets) + force_download = st.sidebar.checkbox("Refresh market data now", value=False) st.sidebar.header("Strategy") strategy_type = st.sidebar.selectbox( @@ -145,6 +347,7 @@ def render_sidebar() -> Dict[str, Any]: "start_date": start_date, "end_date": end_date, "selected_assets": selected_assets, + "force_download": force_download, "strategy_type": strategy_type, "n_proposals": n_proposals, "run_agent": run_btn, @@ -154,7 +357,7 @@ def render_sidebar() -> Dict[str, Any]: # ─── Main ────────────────────────────────────────────────────────────────────── def main(): - st.title("🤖 AgentQuant: AI Trading Research Platform") + st.title("🤖 AgentQuant Research Platform") # Session state init for key, default in [ @@ -163,12 +366,27 @@ def main(): ("regime_label", ""), ("regime_signals", None), ("_data_cache", {}), + ("stored_alphas", []), + ("stored_nla_records", []), + ("alpha_memory_context", ""), + ("nla_memory_context", ""), ]: if key not in st.session_state: st.session_state[key] = default opts = render_sidebar() + render_research_workspace() + st.divider() + st.header("Agent Lab") + st.caption("Generate new strategy proposals, backtest them, and promote successful runs into the research workspace.") + if st.session_state.alpha_memory_context: + with st.expander("Latest alpha memory context", expanded=False): + st.code(st.session_state.alpha_memory_context) + if st.session_state.nla_memory_context: + with st.expander("Latest NLA memory context", expanded=False): + st.code(st.session_state.nla_memory_context) + # ── Regime banner (always show if we have a regime) ────────────────────── if st.session_state.regime_label: st.markdown(_regime_badge(st.session_state.regime_label), unsafe_allow_html=True) @@ -195,7 +413,12 @@ def main(): try: # Step 1: Fetch data progress.progress(10, text="📥 Fetching market data…") - data = _fetch_data_cached(assets_tuple, start_str, end_str) + data = _fetch_data_cached( + assets_tuple, + start_str, + end_str, + force_download=opts["force_download"], + ) st.session_state._data_cache = data if config.reference_asset not in data: @@ -211,15 +434,30 @@ def main(): signals = detect_regime_full(features_df) context = build_context(features_df) context.regime_label = signals.regime_label + alpha_store = AlphaStore() + nla_store = NLAMemoryStore() + context.alpha_memory_context = alpha_store.to_prompt_context( + signals.regime_label, + opts["strategy_type"], + n=5, + ) + context.nla_memory_context = nla_store.to_prompt_context( + signals.regime_label, + opts["strategy_type"], + n=5, + ) st.session_state.regime_label = signals.regime_label st.session_state.regime_signals = signals + st.session_state.alpha_memory_context = context.alpha_memory_context + st.session_state.nla_memory_context = context.nla_memory_context # Refresh regime banner immediately st.markdown(_regime_badge(signals.regime_label), unsafe_allow_html=True) + render_agent_memory_context(signals.regime_label, opts["strategy_type"]) # Step 4: Generate proposals progress.progress(50, text="🧠 Generating strategy proposals…") - generator = ProposalGenerator() + generator = ProposalGenerator(alpha_store=alpha_store) proposals = generator.generate( context=context, n_proposals=opts["n_proposals"], @@ -228,6 +466,8 @@ def main(): # Step 5: Backtest each proposal backtest_results = {} + stored_alphas = [] + stored_nla_records = [] for i, proposal in enumerate(proposals): pct = 55 + int(40 * (i + 1) / len(proposals)) progress.progress( @@ -247,15 +487,44 @@ def main(): "proposal": proposal, "result": result, } + stored_alphas.append( + alpha_store.store_backtest_result( + regime=signals.regime_label, + strategy_type=opts["strategy_type"], + params=proposal.params, + metrics=result["metrics"], + assets=list(opts["selected_assets"]), + generation_method=proposal.generation_method, + confidence=proposal.confidence, + reasoning=proposal.reasoning, + source="streamlit_agent_lab", + ) + ) + stored_nla_records.append( + nla_store.store_agent_summary( + regime=signals.regime_label, + strategy_type=opts["strategy_type"], + params=proposal.params, + metrics=result["metrics"], + narrative=proposal.reasoning + or "Explicit Agent Lab summary for this tested proposal.", + alpha_id=stored_alphas[-1].alpha_id, + tags=("streamlit_agent_lab", proposal.generation_method), + ) + ) except Exception as e: logger.warning("Backtest failed for proposal %d: %s", i + 1, e) st.session_state.strategies = proposals st.session_state.backtest_results = backtest_results + st.session_state.stored_alphas = stored_alphas + st.session_state.stored_nla_records = stored_nla_records progress.progress(100, text="✅ Done!") st.success( - f"Generated {len(proposals)} proposals, {len(backtest_results)} backtested successfully." + f"Generated {len(proposals)} proposals, {len(backtest_results)} backtested successfully, " + f"stored {len(stored_alphas)} alpha candidates and " + f"{len(stored_nla_records)} NLA memory records." ) except Exception as e: @@ -268,6 +537,21 @@ def main(): if st.session_state.backtest_results: results = st.session_state.backtest_results + if st.session_state.stored_alphas: + st.subheader("Stored Alpha Candidates") + st.dataframe( + _alpha_candidates_to_dataframe(st.session_state.stored_alphas), + use_container_width=True, + hide_index=True, + ) + if st.session_state.stored_nla_records: + st.subheader("Stored NLA Memory") + st.dataframe( + _nla_records_to_dataframe(st.session_state.stored_nla_records), + use_container_width=True, + hide_index=True, + ) + # #25: Comparative table — most important quant view st.subheader("📊 Strategy Comparison") rows = [] @@ -288,8 +572,6 @@ def main(): }) if rows: df_cmp = pd.DataFrame(rows).set_index("Strategy") - # Highlight best Sharpe - best_sharpe_idx = df_cmp["Sharpe"].astype(float).idxmax() st.dataframe( df_cmp.style.highlight_max(subset=["Sharpe"], color="#d4edda") .highlight_min(subset=["Max DD"], color="#d4edda"), diff --git a/src/data/ingest.py b/src/data/ingest.py index 4513434..a2d8891 100644 --- a/src/data/ingest.py +++ b/src/data/ingest.py @@ -66,6 +66,29 @@ def _is_cache_valid(file_path: Path) -> bool: return True +def _cache_covers_range(df: pd.DataFrame, start_date=None, end_date=None) -> bool: + """Return True when a cached frame covers the requested date range.""" + if df.empty: + return False + if not isinstance(df.index, pd.DatetimeIndex): + return False + + index = df.index.tz_localize(None) if df.index.tz is not None else df.index + min_date = index.min().normalize() + max_date = index.max().normalize() + + if start_date: + requested_start = pd.to_datetime(start_date).normalize() + if min_date > requested_start: + return False + if end_date: + requested_end = pd.to_datetime(end_date).normalize() + # yfinance treats end as exclusive. Permit a small weekend/holiday gap. + if max_date < requested_end - pd.Timedelta(days=3): + return False + return True + + def fetch_ohlcv_data( ticker: Optional[str] = None, start_date=None, @@ -103,6 +126,9 @@ def fetch_ohlcv_data( if not force_download and _is_cache_valid(file_path): try: df = pd.read_parquet(file_path) + if not _cache_covers_range(df, start_date, end_date): + logger.info("Cache for %s does not cover requested range. Re-fetching.", t) + raise ValueError("cache does not cover requested date range") if start_date: df = df[df.index >= pd.to_datetime(start_date)] if end_date: @@ -176,4 +202,4 @@ def fetch_fred_data(force_download: bool = False) -> Optional[Dict[str, pd.DataF except Exception as e: logger.error("Could not fetch FRED series %s: %s", series_id, e) - return fred_data \ No newline at end of file + return fred_data diff --git a/src/research/__init__.py b/src/research/__init__.py new file mode 100644 index 0000000..9968043 --- /dev/null +++ b/src/research/__init__.py @@ -0,0 +1,25 @@ +"""Research workspace primitives for the AgentQuant platform.""" + +from src.research.alpha_store import AlphaCandidate, AlphaStore +from src.research.nla_memory import NLAMemoryStore, NLARecord +from src.research.workspace import ( + ResearchRun, + ValidationCheck, + build_research_memo, + load_research_workspace, + runs_to_dataframe, + summarize_workspace, +) + +__all__ = [ + "AlphaCandidate", + "AlphaStore", + "NLAMemoryStore", + "NLARecord", + "ResearchRun", + "ValidationCheck", + "build_research_memo", + "load_research_workspace", + "runs_to_dataframe", + "summarize_workspace", +] diff --git a/src/research/alpha_store.py b/src/research/alpha_store.py new file mode 100644 index 0000000..940a43c --- /dev/null +++ b/src/research/alpha_store.py @@ -0,0 +1,284 @@ +""" +Alpha Store +=========== + +SQLite-backed memory for alpha candidates discovered by Agent Lab runs. +Each candidate keeps the thesis, parameters, regime, validation metrics, and +status so future agents can retrieve the strongest prior evidence. +""" + +from __future__ import annotations + +import json +import sqlite3 +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Iterable, List + +from src.utils.config import config + + +@dataclass +class AlphaCandidate: + """A discovered alpha candidate and its validation evidence.""" + + alpha_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8]) + timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + regime: str = "Unknown" + strategy_type: str = "" + params: Dict[str, Any] = field(default_factory=dict) + thesis: str = "" + status: str = "watch" + sharpe: float = 0.0 + total_return: float = 0.0 + max_drawdown: float = 0.0 + num_trades: int = 0 + confidence: float = 0.0 + alpha_score: float = 0.0 + generation_method: str = "" + assets: List[str] = field(default_factory=list) + source: str = "" + + def as_row(self) -> Dict[str, Any]: + return { + "Alpha ID": self.alpha_id, + "Status": self.status, + "Regime": self.regime, + "Strategy": self.strategy_type, + "Params": self.params, + "Sharpe": round(self.sharpe, 3), + "Return": self.total_return, + "Max Drawdown": self.max_drawdown, + "Trades": self.num_trades, + "Score": round(self.alpha_score, 3), + "Method": self.generation_method, + "Assets": ", ".join(self.assets), + "Thesis": self.thesis, + } + + +class AlphaStore: + """Persistence and retrieval layer for alpha candidates.""" + + def __init__(self, db_path: str | Path | None = None): + self.db_path = Path(db_path or config.results_db_path) + self.db_path.parent.mkdir(parents=True, exist_ok=True) + self._init_db() + + def _init_db(self) -> None: + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS alpha_candidates ( + alpha_id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + regime TEXT NOT NULL, + strategy_type TEXT NOT NULL, + params_json TEXT NOT NULL, + thesis TEXT DEFAULT '', + status TEXT DEFAULT 'watch', + sharpe REAL DEFAULT 0.0, + total_return REAL DEFAULT 0.0, + max_drawdown REAL DEFAULT 0.0, + num_trades INTEGER DEFAULT 0, + confidence REAL DEFAULT 0.0, + alpha_score REAL DEFAULT 0.0, + generation_method TEXT DEFAULT '', + assets_json TEXT DEFAULT '[]', + source TEXT DEFAULT '' + ) + """) + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_alpha_lookup + ON alpha_candidates (regime, strategy_type, status, alpha_score) + """) + + def store(self, candidate: AlphaCandidate) -> str: + """Insert or replace an alpha candidate.""" + with sqlite3.connect(self.db_path) as conn: + conn.execute( + """INSERT OR REPLACE INTO alpha_candidates + (alpha_id, timestamp, regime, strategy_type, params_json, + thesis, status, sharpe, total_return, max_drawdown, + num_trades, confidence, alpha_score, generation_method, + assets_json, source) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + candidate.alpha_id, + candidate.timestamp, + candidate.regime, + candidate.strategy_type, + json.dumps(candidate.params, sort_keys=True), + candidate.thesis, + candidate.status, + candidate.sharpe, + candidate.total_return, + candidate.max_drawdown, + candidate.num_trades, + candidate.confidence, + candidate.alpha_score, + candidate.generation_method, + json.dumps(candidate.assets), + candidate.source, + ), + ) + return candidate.alpha_id + + def store_backtest_result( + self, + *, + regime: str, + strategy_type: str, + params: Dict[str, Any], + metrics: Dict[str, Any], + assets: Iterable[str], + generation_method: str = "", + confidence: float = 0.0, + reasoning: str = "", + source: str = "", + ) -> AlphaCandidate: + """Create and persist an alpha candidate from a backtest result.""" + sharpe = _metric(metrics, "sharpe_ratio", "sharpe") + total_return = _metric(metrics, "total_return") + max_drawdown = abs(_metric(metrics, "max_drawdown")) + num_trades = int(_metric(metrics, "num_trades")) + alpha_score = _alpha_score(sharpe, max_drawdown, num_trades) + status = _status_from_metrics(sharpe, max_drawdown) + thesis = reasoning or _default_thesis(strategy_type, params, regime) + + candidate = AlphaCandidate( + regime=regime, + strategy_type=strategy_type, + params=dict(params), + thesis=thesis, + status=status, + sharpe=sharpe, + total_return=total_return, + max_drawdown=max_drawdown, + num_trades=num_trades, + confidence=float(confidence or 0.0), + alpha_score=alpha_score, + generation_method=generation_method, + assets=sorted(set(assets)), + source=source, + ) + self.store(candidate) + return candidate + + def recall( + self, + *, + regime: str = "", + strategy_type: str = "", + statuses: Iterable[str] = ("accepted", "watch"), + n: int = 5, + ) -> List[AlphaCandidate]: + """Recall top alpha candidates for similar future agent runs.""" + query = "SELECT * FROM alpha_candidates WHERE 1=1" + params: List[Any] = [] + + if regime: + query += " AND regime = ?" + params.append(regime) + if strategy_type: + query += " AND strategy_type = ?" + params.append(strategy_type) + + statuses = tuple(statuses) + if statuses: + placeholders = ",".join("?" for _ in statuses) + query += f" AND status IN ({placeholders})" + params.extend(statuses) + + query += " ORDER BY alpha_score DESC, timestamp DESC LIMIT ?" + params.append(n) + + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute(query, params).fetchall() + + return [_row_to_candidate(row) for row in rows] + + def list_recent(self, n: int = 25) -> List[AlphaCandidate]: + """Return the most recent alpha candidates regardless of status.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT * FROM alpha_candidates ORDER BY timestamp DESC LIMIT ?", + (n,), + ).fetchall() + return [_row_to_candidate(row) for row in rows] + + def to_prompt_context(self, regime: str, strategy_type: str = "", n: int = 5) -> str: + """Format recalled alpha candidates as retrieval context for an agent.""" + candidates = self.recall(regime=regime, strategy_type=strategy_type, n=n) + rejected = self.recall( + regime=regime, + strategy_type=strategy_type, + statuses=("rejected",), + n=n, + ) + if not candidates and not rejected: + return "No stored alpha candidates for this regime and strategy yet." + + lines = ["ALPHA MEMORY FROM PRIOR RUNS:"] + for alpha in candidates: + lines.append( + f" - {alpha.status.upper()} {alpha.strategy_type} {json.dumps(alpha.params, sort_keys=True)} " + f"| Sharpe={alpha.sharpe:.2f}, Drawdown={alpha.max_drawdown:.1%}, " + f"Score={alpha.alpha_score:.2f}, Thesis={alpha.thesis}" + ) + for alpha in rejected: + lines.append( + f" - REJECTED {alpha.strategy_type} {json.dumps(alpha.params, sort_keys=True)} " + f"| Sharpe={alpha.sharpe:.2f}, Drawdown={alpha.max_drawdown:.1%}. " + "Avoid repeating this exact configuration unless new evidence changes." + ) + return "\n".join(lines) + + +def _metric(metrics: Dict[str, Any], *names: str) -> float: + for name in names: + value = metrics.get(name) + if value is not None: + return float(value) + return 0.0 + + +def _alpha_score(sharpe: float, max_drawdown: float, num_trades: int) -> float: + trade_penalty = min(num_trades / 1_000.0, 0.25) + return float(sharpe - max_drawdown - trade_penalty) + + +def _status_from_metrics(sharpe: float, max_drawdown: float) -> str: + if sharpe >= config.agent.min_acceptable_sharpe and max_drawdown <= config.agent.risk.max_drawdown: + return "accepted" + if sharpe > 0: + return "watch" + return "rejected" + + +def _default_thesis(strategy_type: str, params: Dict[str, Any], regime: str) -> str: + return f"{strategy_type} parameters {json.dumps(params, sort_keys=True)} tested in {regime}." + + +def _row_to_candidate(row: sqlite3.Row) -> AlphaCandidate: + return AlphaCandidate( + alpha_id=row["alpha_id"], + timestamp=row["timestamp"], + regime=row["regime"], + strategy_type=row["strategy_type"], + params=json.loads(row["params_json"] or "{}"), + thesis=row["thesis"] or "", + status=row["status"] or "watch", + sharpe=float(row["sharpe"] or 0.0), + total_return=float(row["total_return"] or 0.0), + max_drawdown=float(row["max_drawdown"] or 0.0), + num_trades=int(row["num_trades"] or 0), + confidence=float(row["confidence"] or 0.0), + alpha_score=float(row["alpha_score"] or 0.0), + generation_method=row["generation_method"] or "", + assets=json.loads(row["assets_json"] or "[]"), + source=row["source"] or "", + ) diff --git a/src/research/nla_memory.py b/src/research/nla_memory.py new file mode 100644 index 0000000..0e71d17 --- /dev/null +++ b/src/research/nla_memory.py @@ -0,0 +1,258 @@ +""" +NLA Memory +========== + +SQLite-backed storage for explicit natural-language activation narratives. +This module consumes NLA-style outputs such as the JSONL files emitted by +OnePunchMonk/nla-gemma4 and exposes them as retrieval context for future +strategy agents. +""" + +from __future__ import annotations + +import json +import sqlite3 +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Iterable, List + +from src.utils.config import config + + +@dataclass +class NLARecord: + """A stored explicit activation narrative for future research retrieval.""" + + record_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8]) + timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + regime: str = "Unknown" + strategy_type: str = "" + params: Dict[str, Any] = field(default_factory=dict) + narrative: str = "" + source_text: str = "" + source_model: str = "" + cosine: float = 0.0 + direction_mse: float = 0.0 + quality_score: float = 0.0 + tags: List[str] = field(default_factory=list) + alpha_id: str = "" + + def as_row(self) -> Dict[str, Any]: + return { + "Record ID": self.record_id, + "Regime": self.regime, + "Strategy": self.strategy_type, + "Params": self.params, + "Quality": round(self.quality_score, 3), + "Cosine": round(self.cosine, 3), + "Direction MSE": round(self.direction_mse, 3), + "Source": self.source_model, + "Tags": ", ".join(self.tags), + "Narrative": self.narrative, + } + + +class NLAMemoryStore: + """Persistence and retrieval layer for explicit NLA narratives.""" + + def __init__(self, db_path: str | Path | None = None): + self.db_path = Path(db_path or config.results_db_path) + self.db_path.parent.mkdir(parents=True, exist_ok=True) + self._init_db() + + def _init_db(self) -> None: + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS nla_records ( + record_id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + regime TEXT NOT NULL, + strategy_type TEXT NOT NULL, + params_json TEXT NOT NULL, + narrative TEXT DEFAULT '', + source_text TEXT DEFAULT '', + source_model TEXT DEFAULT '', + cosine REAL DEFAULT 0.0, + direction_mse REAL DEFAULT 0.0, + quality_score REAL DEFAULT 0.0, + tags_json TEXT DEFAULT '[]', + alpha_id TEXT DEFAULT '' + ) + """) + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_nla_lookup + ON nla_records (regime, strategy_type, quality_score) + """) + + def store(self, record: NLARecord) -> str: + """Insert or replace an NLA memory record.""" + with sqlite3.connect(self.db_path) as conn: + conn.execute( + """INSERT OR REPLACE INTO nla_records + (record_id, timestamp, regime, strategy_type, params_json, + narrative, source_text, source_model, cosine, direction_mse, + quality_score, tags_json, alpha_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + record.record_id, + record.timestamp, + record.regime, + record.strategy_type, + json.dumps(record.params, sort_keys=True), + record.narrative, + record.source_text, + record.source_model, + float(record.cosine or 0.0), + float(record.direction_mse or 0.0), + float(record.quality_score or 0.0), + json.dumps(record.tags), + record.alpha_id, + ), + ) + return record.record_id + + def store_agent_summary( + self, + *, + regime: str, + strategy_type: str, + params: Dict[str, Any], + narrative: str, + metrics: Dict[str, Any], + alpha_id: str = "", + source_model: str = "agentquant-explicit-summary", + tags: Iterable[str] = (), + ) -> NLARecord: + """Store an explicit agent summary as NLA-compatible memory.""" + sharpe = float(metrics.get("sharpe_ratio", metrics.get("sharpe", 0.0)) or 0.0) + max_drawdown = abs(float(metrics.get("max_drawdown", 0.0) or 0.0)) + quality_score = sharpe - max_drawdown + record = NLARecord( + regime=regime, + strategy_type=strategy_type, + params=dict(params), + narrative=narrative, + source_text=_agent_source_text(strategy_type, params, metrics), + source_model=source_model, + quality_score=quality_score, + tags=sorted(set(tags)), + alpha_id=alpha_id, + ) + self.store(record) + return record + + def ingest_nla_jsonl( + self, + path: str | Path, + *, + regime: str, + strategy_type: str, + params: Dict[str, Any] | None = None, + source_model: str = "gemma4-nla", + tags: Iterable[str] = (), + ) -> List[NLARecord]: + """Import NLA JSONL evaluation output from nla-gemma4.""" + records: List[NLARecord] = [] + with Path(path).open("r", encoding="utf-8") as handle: + for line in handle: + if not line.strip(): + continue + payload = json.loads(line) + cosine = float(payload.get("cosine", 0.0) or 0.0) + direction_mse = float(payload.get("direction_mse", 0.0) or 0.0) + record = NLARecord( + regime=regime, + strategy_type=strategy_type, + params=dict(params or {}), + narrative=str(payload.get("explanation", "")), + source_text=str(payload.get("text", "")), + source_model=str(payload.get("model_id", source_model)), + cosine=cosine, + direction_mse=direction_mse, + quality_score=cosine - direction_mse, + tags=sorted(set(tags)), + ) + self.store(record) + records.append(record) + return records + + def recall( + self, + *, + regime: str = "", + strategy_type: str = "", + n: int = 5, + ) -> List[NLARecord]: + """Recall top explicit NLA records for similar future agent runs.""" + query = "SELECT * FROM nla_records WHERE 1=1" + params: List[Any] = [] + + if regime: + query += " AND regime = ?" + params.append(regime) + if strategy_type: + query += " AND strategy_type = ?" + params.append(strategy_type) + + query += " ORDER BY quality_score DESC, timestamp DESC LIMIT ?" + params.append(n) + + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute(query, params).fetchall() + return [_row_to_record(row) for row in rows] + + def list_recent(self, n: int = 25) -> List[NLARecord]: + """Return recent NLA records regardless of regime.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT * FROM nla_records ORDER BY timestamp DESC LIMIT ?", + (n,), + ).fetchall() + return [_row_to_record(row) for row in rows] + + def to_prompt_context(self, regime: str, strategy_type: str = "", n: int = 5) -> str: + """Format explicit NLA memories as retrieval context for an agent.""" + records = self.recall(regime=regime, strategy_type=strategy_type, n=n) + if not records: + return "No NLA memory records for this regime and strategy yet." + + lines = [ + "NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES:", + " Use these as research notes, not as hidden chain-of-thought.", + ] + for record in records: + lines.append( + f" - {record.strategy_type} {json.dumps(record.params, sort_keys=True)} " + f"| Quality={record.quality_score:.2f}, Source={record.source_model}, " + f"Narrative={record.narrative}" + ) + return "\n".join(lines) + + +def _agent_source_text(strategy_type: str, params: Dict[str, Any], metrics: Dict[str, Any]) -> str: + return ( + f"{strategy_type} proposal {json.dumps(params, sort_keys=True)} " + f"produced metrics {json.dumps(metrics, sort_keys=True, default=str)}" + ) + + +def _row_to_record(row: sqlite3.Row) -> NLARecord: + return NLARecord( + record_id=row["record_id"], + timestamp=row["timestamp"], + regime=row["regime"], + strategy_type=row["strategy_type"], + params=json.loads(row["params_json"] or "{}"), + narrative=row["narrative"] or "", + source_text=row["source_text"] or "", + source_model=row["source_model"] or "", + cosine=float(row["cosine"] or 0.0), + direction_mse=float(row["direction_mse"] or 0.0), + quality_score=float(row["quality_score"] or 0.0), + tags=json.loads(row["tags_json"] or "[]"), + alpha_id=row["alpha_id"] or "", + ) diff --git a/src/research/workspace.py b/src/research/workspace.py new file mode 100644 index 0000000..eda8196 --- /dev/null +++ b/src/research/workspace.py @@ -0,0 +1,432 @@ +""" +Research Workspace +================== + +Typed platform layer for turning experiment outputs into inspectable research +runs. The dashboard can render these objects without knowing whether they came +from SQLite, CSV backtests, or future agent-generated reports. +""" + +from __future__ import annotations + +import json +import math +import re +from dataclasses import dataclass, field +from pathlib import Path +from statistics import mean, median +from typing import Any, Dict, Iterable, List + +import pandas as pd + +PASS = "pass" +WARN = "warn" +FAIL = "fail" + + +@dataclass(frozen=True) +class ValidationCheck: + """A research hygiene check shown in the platform workspace.""" + + name: str + status: str + detail: str + + +@dataclass(frozen=True) +class ResearchRun: + """A normalized, UI-ready experiment or benchmark run.""" + + run_id: str + name: str + source: str + strategy: str + mode: str + metrics: Dict[str, float] + validation_checks: List[ValidationCheck] = field(default_factory=list) + artifacts: List[str] = field(default_factory=list) + notes: str = "" + timestamp: str = "" + git_hash: str = "" + + @property + def sharpe(self) -> float: + return float(self.metrics.get("sharpe", 0.0)) + + @property + def total_return(self) -> float: + return float(self.metrics.get("total_return", 0.0)) + + @property + def max_drawdown(self) -> float: + return abs(float(self.metrics.get("max_drawdown", 0.0))) + + @property + def robustness_score(self) -> float: + return float(self.metrics.get("robustness_score", self.sharpe - self.max_drawdown)) + + @property + def validation_status(self) -> str: + statuses = {check.status for check in self.validation_checks} + if FAIL in statuses: + return FAIL + if WARN in statuses: + return WARN + return PASS + + def as_row(self) -> Dict[str, Any]: + return { + "Run ID": self.run_id, + "Name": self.name, + "Mode": self.mode, + "Strategy": self.strategy, + "Source": self.source, + "Sharpe": round(self.sharpe, 3), + "Return": self.total_return, + "Max Drawdown": self.max_drawdown, + "Robustness": round(self.robustness_score, 3), + "Validation": self.validation_status, + "Git": self.git_hash, + } + + +def _coerce_float(value: Any, default: float = 0.0) -> float: + """Extract a numeric scalar from plain values or pandas string dumps.""" + if value is None: + return default + if isinstance(value, (int, float)) and not isinstance(value, bool): + if math.isnan(value): + return default + return float(value) + + text = str(value).strip() + matches = re.findall(r"[-+]?(?:\d*\.\d+|\d+)(?:[eE][-+]?\d+)?", text) + if not matches: + return default + return float(matches[0]) + + +def _metric_alias(row: Dict[str, Any], *names: str) -> float: + for name in names: + if name in row and pd.notna(row[name]): + return _coerce_float(row[name]) + return 0.0 + + +def _basic_checks( + *, + sharpe: float, + max_drawdown: float, + n_windows: int = 1, + source: str, +) -> List[ValidationCheck]: + checks = [ + ValidationCheck( + name="Metric completeness", + status=PASS if isinstance(sharpe, float) and isinstance(max_drawdown, float) else FAIL, + detail="Sharpe and drawdown are available for comparison.", + ), + ValidationCheck( + name="Drawdown sanity", + status=PASS if abs(max_drawdown) <= 0.35 else WARN, + detail=f"Observed max drawdown is {abs(max_drawdown) * 100:.1f}%.", + ), + ] + + if source == "walk_forward": + checks.append( + ValidationCheck( + name="Temporal validation", + status=PASS if n_windows >= 3 else WARN, + detail=f"Evaluated across {n_windows} chronological windows.", + ) + ) + checks.append( + ValidationCheck( + name="Robustness floor", + status=PASS if sharpe > 0 and n_windows >= 3 else WARN, + detail="Mean Sharpe remains positive after chronological splitting.", + ) + ) + else: + checks.append( + ValidationCheck( + name="Baseline context", + status=WARN, + detail="Useful benchmark, but not a leakage-safe validation protocol.", + ) + ) + + return checks + + +def _aggregate_walk_forward(path: Path) -> ResearchRun | None: + df = pd.read_csv(path) + if df.empty: + return None + + sharpes = [_coerce_float(v) for v in df["sharpe"].tolist()] + returns = [_coerce_float(v) for v in df["return"].tolist()] + drawdowns = [_coerce_float(v) for v in df["drawdown"].tolist()] + mean_sharpe = mean(sharpes) + sharpe_std = pd.Series(sharpes).std(ddof=0) if len(sharpes) > 1 else 0.0 + max_drawdown = max(abs(v) for v in drawdowns) if drawdowns else 0.0 + metrics = { + "sharpe": mean_sharpe, + "median_sharpe": median(sharpes), + "min_sharpe": min(sharpes), + "sharpe_std": float(sharpe_std), + "total_return": sum(returns), + "max_drawdown": max_drawdown, + "robustness_score": mean_sharpe - float(sharpe_std) - max_drawdown, + "n_windows": float(len(df)), + } + return ResearchRun( + run_id="wf-momentum", + name="Walk-forward momentum study", + source="walk_forward", + strategy="momentum", + mode="Agent research", + metrics=metrics, + validation_checks=_basic_checks( + sharpe=mean_sharpe, + max_drawdown=max_drawdown, + n_windows=len(df), + source="walk_forward", + ), + artifacts=[str(path)], + notes="Chronological windows with agent-selected momentum parameters.", + ) + + +def _runs_from_static_baselines(path: Path) -> List[ResearchRun]: + df = pd.read_csv(path) + runs = [] + for idx, row in df.iterrows(): + record = row.to_dict() + strategy = str(record.get("strategy", f"baseline-{idx}")) + sharpe = _metric_alias(record, "sharpe", "sharpe_ratio") + total_return = _metric_alias(record, "return", "total_return") + max_drawdown = abs(_metric_alias(record, "drawdown", "max_drawdown")) + runs.append( + ResearchRun( + run_id=f"base-{idx + 1}", + name=f"{strategy} benchmark", + source="baseline", + strategy=strategy, + mode="Benchmark", + metrics={ + "sharpe": sharpe, + "total_return": total_return, + "max_drawdown": max_drawdown, + "robustness_score": sharpe - max_drawdown, + }, + validation_checks=_basic_checks( + sharpe=sharpe, + max_drawdown=max_drawdown, + source="baseline", + ), + artifacts=[str(path)], + notes="Static benchmark used to anchor agent results.", + ) + ) + return runs + + +def _aggregate_random_baseline(path: Path) -> ResearchRun | None: + df = pd.read_csv(path) + if df.empty: + return None + + sharpes = [_coerce_float(v) for v in df["sharpe"].tolist()] + returns = [_coerce_float(v) for v in df["return"].tolist()] + drawdowns = [_coerce_float(v) for v in df["drawdown"].tolist()] + p95 = float(pd.Series(sharpes).quantile(0.95)) + max_drawdown = max(abs(v) for v in drawdowns) if drawdowns else 0.0 + metrics = { + "sharpe": mean(sharpes), + "p95_sharpe": p95, + "total_return": mean(returns), + "max_drawdown": max_drawdown, + "robustness_score": mean(sharpes) - max_drawdown, + "n_trials": float(len(df)), + } + return ResearchRun( + run_id="rnd-momentum", + name="Random momentum baseline", + source="baseline", + strategy="momentum", + mode="Benchmark", + metrics=metrics, + validation_checks=_basic_checks( + sharpe=metrics["sharpe"], + max_drawdown=max_drawdown, + source="baseline", + ), + artifacts=[str(path)], + notes="Distributional baseline for checking whether agent runs beat random parameter search.", + ) + + +def _runs_from_ablation(path: Path) -> List[ResearchRun]: + df = pd.read_csv(path) + if df.empty or "type" not in df or "sharpe" not in df: + return [] + + runs = [] + for group_name, group in df.groupby("type"): + sharpes = [_coerce_float(v) for v in group["sharpe"].tolist()] + avg_sharpe = mean(sharpes) + runs.append( + ResearchRun( + run_id=f"abl-{str(group_name).lower().replace(' ', '-')}", + name=f"{group_name} ablation", + source="ablation", + strategy="agent_context", + mode="Ablation", + metrics={ + "sharpe": avg_sharpe, + "total_return": 0.0, + "max_drawdown": 0.0, + "robustness_score": avg_sharpe, + "n_trials": float(len(group)), + }, + validation_checks=[ + ValidationCheck( + name="Ablation coverage", + status=PASS if len(group) >= 3 else WARN, + detail=f"{len(group)} trials available for this ablation arm.", + ), + ValidationCheck( + name="Metric completeness", + status=PASS, + detail="Sharpe is available for context-vs-no-context comparison.", + ), + ], + artifacts=[str(path)], + notes="Compares agent proposal quality with and without regime context.", + ) + ) + return runs + + +def _runs_from_results_store(db_path: Path) -> List[ResearchRun]: + if not db_path.exists(): + return [] + + from experiments.results_store import ResultsStore + + runs = [] + store = ResultsStore(str(db_path)) + for row in store.list_runs(): + aggregate = json.loads(row.get("aggregate_metrics") or "{}") + sharpe = _metric_alias(aggregate, "mean_sharpe", "sharpe", "sharpe_ratio") + max_drawdown = abs(_metric_alias(aggregate, "max_drawdown")) + run = ResearchRun( + run_id=str(row["run_id"]), + name=f"{row['experiment_type']} run", + source="sqlite", + strategy=str(aggregate.get("strategy", "mixed")), + mode=str(row["experiment_type"]), + timestamp=str(row.get("timestamp", "")), + git_hash=str(row.get("git_hash", "")), + metrics={ + "sharpe": sharpe, + "total_return": _metric_alias(aggregate, "total_return"), + "max_drawdown": max_drawdown, + "robustness_score": sharpe - max_drawdown, + "n_windows": _metric_alias(aggregate, "n_windows"), + }, + validation_checks=_basic_checks( + sharpe=sharpe, + max_drawdown=max_drawdown, + n_windows=int(_metric_alias(aggregate, "n_windows")), + source="walk_forward" if "walk" in str(row["experiment_type"]) else "sqlite", + ), + artifacts=[str(db_path)], + ) + runs.append(run) + return runs + + +def load_research_workspace( + experiments_dir: str | Path = "experiments", + results_db_path: str | Path = "experiments/results.db", +) -> List[ResearchRun]: + """Load platform-ready research runs from known local experiment artifacts.""" + exp_dir = Path(experiments_dir) + runs: List[ResearchRun] = [] + + runs.extend(_runs_from_results_store(Path(results_db_path))) + + walk_forward_path = exp_dir / "walk_forward_results.csv" + if walk_forward_path.exists(): + run = _aggregate_walk_forward(walk_forward_path) + if run: + runs.append(run) + + static_path = exp_dir / "static_baseline_results.csv" + if static_path.exists(): + runs.extend(_runs_from_static_baselines(static_path)) + + random_path = exp_dir / "random_baseline_results.csv" + if random_path.exists(): + run = _aggregate_random_baseline(random_path) + if run: + runs.append(run) + + ablation_path = exp_dir / "ablation_results.csv" + if ablation_path.exists(): + runs.extend(_runs_from_ablation(ablation_path)) + + return sorted(runs, key=lambda run: run.robustness_score, reverse=True) + + +def runs_to_dataframe(runs: Iterable[ResearchRun]) -> pd.DataFrame: + """Convert research runs to a dashboard-friendly table.""" + rows = [run.as_row() for run in runs] + return pd.DataFrame(rows) + + +def summarize_workspace(runs: Iterable[ResearchRun]) -> Dict[str, Any]: + """Aggregate high-level platform KPIs for the research workspace.""" + run_list = list(runs) + if not run_list: + return { + "run_count": 0, + "best_run": None, + "best_sharpe": 0.0, + "best_robustness": 0.0, + "validation_pass_rate": 0.0, + } + + best_run = max(run_list, key=lambda run: run.robustness_score) + pass_count = sum(1 for run in run_list if run.validation_status == PASS) + return { + "run_count": len(run_list), + "best_run": best_run, + "best_sharpe": max(run.sharpe for run in run_list), + "best_robustness": best_run.robustness_score, + "validation_pass_rate": pass_count / len(run_list), + } + + +def build_research_memo(run: ResearchRun) -> str: + """Generate a concise Markdown memo for a selected research run.""" + checks = "\n".join( + f"- **{check.name}** ({check.status}): {check.detail}" + for check in run.validation_checks + ) + return f"""### {run.name} + +**Mode:** {run.mode} +**Strategy:** {run.strategy} +**Source:** `{run.source}` + +**Result:** Sharpe {run.sharpe:.3f}, total return {run.total_return * 100:.1f}%, max drawdown {run.max_drawdown * 100:.1f}%, robustness {run.robustness_score:.3f}. + +**Research Notes:** {run.notes or "No notes recorded."} + +**Validation Checks** +{checks} +""" diff --git a/tests/test_alpha_store.py b/tests/test_alpha_store.py new file mode 100644 index 0000000..5e19ba1 --- /dev/null +++ b/tests/test_alpha_store.py @@ -0,0 +1,113 @@ +"""Tests for alpha memory persistence and retrieval.""" + +from src.agent.context_builder import RegimeContext +from src.agent.proposal_generator import ProposalGenerator +from src.research.alpha_store import AlphaStore + + +class NoopPlanner: + def is_available(self): + return False + + def generate_proposals(self, prompt, n=5): + return [] + + +def test_alpha_store_persists_and_recalls_top_candidates(tmp_path): + store = AlphaStore(tmp_path / "alphas.db") + + weak = store.store_backtest_result( + regime="MidVol-Bull", + strategy_type="momentum", + params={"fast_window": 5, "slow_window": 20}, + metrics={"sharpe_ratio": 0.1, "total_return": 0.02, "max_drawdown": 0.04}, + assets=["SPY"], + reasoning="Short momentum test.", + ) + strong = store.store_backtest_result( + regime="MidVol-Bull", + strategy_type="momentum", + params={"fast_window": 50, "slow_window": 200}, + metrics={"sharpe_ratio": 1.2, "total_return": 0.22, "max_drawdown": 0.05}, + assets=["SPY", "QQQ"], + reasoning="Long-horizon momentum held up in bullish mid-vol regime.", + ) + + recalled = store.recall(regime="MidVol-Bull", strategy_type="momentum", n=5) + + assert [alpha.alpha_id for alpha in recalled] == [strong.alpha_id, weak.alpha_id] + assert recalled[0].status == "accepted" + assert recalled[0].assets == ["QQQ", "SPY"] + + +def test_alpha_prompt_context_is_agent_readable(tmp_path): + store = AlphaStore(tmp_path / "alphas.db") + store.store_backtest_result( + regime="LowVol-Bull", + strategy_type="momentum", + params={"fast_window": 63, "slow_window": 252}, + metrics={"sharpe_ratio": 0.8, "total_return": 0.18, "max_drawdown": 0.03}, + assets=["SPY"], + reasoning="Slow momentum worked in calm uptrends.", + ) + + context = store.to_prompt_context("LowVol-Bull", "momentum") + + assert "ALPHA MEMORY FROM PRIOR RUNS" in context + assert "Slow momentum worked" in context + assert "fast_window" in context + + +def test_alpha_prompt_context_includes_rejected_configs(tmp_path): + store = AlphaStore(tmp_path / "alphas.db") + store.store_backtest_result( + regime="MidVol-Bull", + strategy_type="momentum", + params={"fast_window": 5, "slow_window": 20}, + metrics={"sharpe_ratio": -0.4, "total_return": -0.1, "max_drawdown": 0.25}, + assets=["SPY"], + reasoning="Too reactive in this regime.", + ) + + context = store.to_prompt_context("MidVol-Bull", "momentum") + + assert "REJECTED" in context + assert "Avoid repeating" in context + + +def test_proposal_generator_uses_alpha_memory_before_grid(tmp_path): + store = AlphaStore(tmp_path / "alphas.db") + store.store_backtest_result( + regime="MidVol-Bull", + strategy_type="momentum", + params={"fast_window": 50, "slow_window": 200}, + metrics={"sharpe_ratio": 1.1, "total_return": 0.2, "max_drawdown": 0.08}, + assets=["SPY"], + reasoning="Retrieved candidate should lead future generation.", + ) + + generator = ProposalGenerator(planner=NoopPlanner(), alpha_store=store) + context = RegimeContext(regime_label="MidVol-Bull") + proposals = generator.generate(context, n_proposals=3, strategy_type="momentum") + + assert proposals[0].generation_method == "alpha_memory" + assert proposals[0].params == {"fast_window": 50, "slow_window": 200} + assert len(proposals) == 3 + + +def test_proposal_generator_avoids_rejected_alpha_params(tmp_path): + store = AlphaStore(tmp_path / "alphas.db") + store.store_backtest_result( + regime="MidVol-Bull", + strategy_type="momentum", + params={"fast_window": 5, "slow_window": 20}, + metrics={"sharpe_ratio": -0.5, "total_return": -0.08, "max_drawdown": 0.22}, + assets=["SPY"], + reasoning="Rejected fast crossover.", + ) + + generator = ProposalGenerator(planner=NoopPlanner(), alpha_store=store) + context = RegimeContext(regime_label="MidVol-Bull") + proposals = generator.generate(context, n_proposals=5, strategy_type="momentum") + + assert {"fast_window": 5, "slow_window": 20} not in [proposal.params for proposal in proposals] diff --git a/tests/test_data_ingest.py b/tests/test_data_ingest.py new file mode 100644 index 0000000..a363177 --- /dev/null +++ b/tests/test_data_ingest.py @@ -0,0 +1,58 @@ +"""Tests for live market-data ingestion and cache behavior.""" + +import pandas as pd + +from src.data.ingest import _cache_covers_range, fetch_ohlcv_data +from src.utils.config import config + + +def test_cache_range_coverage_detects_missing_requested_dates(): + cached = pd.DataFrame( + {"Close": [100.0, 101.0]}, + index=pd.to_datetime(["2024-01-02", "2024-01-03"]), + ) + + assert _cache_covers_range(cached, "2024-01-02", "2024-01-04") + assert not _cache_covers_range(cached, "2023-12-01", "2024-01-04") + assert not _cache_covers_range(cached, "2024-01-02", "2024-02-01") + + +def test_fetch_refetches_when_cache_does_not_cover_range(tmp_path, monkeypatch): + monkeypatch.setattr(config, "data_path", str(tmp_path)) + monkeypatch.setattr(config.cache, "enabled", True) + monkeypatch.setattr(config.cache, "ttl_hours", 24) + + cached = pd.DataFrame( + { + "Open": [100.0], + "High": [101.0], + "Low": [99.0], + "Close": [100.5], + "Volume": [1_000], + }, + index=pd.to_datetime(["2024-01-02"]), + ) + cached.to_parquet(tmp_path / "AAPL.parquet") + + downloaded = pd.DataFrame( + { + "Open": [100.0, 102.0], + "High": [101.0, 103.0], + "Low": [99.0, 101.0], + "Close": [100.5, 102.5], + "Volume": [1_000, 1_500], + }, + index=pd.to_datetime(["2024-02-01", "2024-02-02"]), + ) + calls = [] + + def fake_download(ticker, start=None, end=None, auto_adjust=True, progress=False): + calls.append((ticker, start, end, auto_adjust, progress)) + return downloaded + + monkeypatch.setattr("src.data.ingest.yf.download", fake_download) + + result = fetch_ohlcv_data("AAPL", "2024-02-01", "2024-02-03") + + assert calls == [("AAPL", "2024-02-01", "2024-02-03", True, False)] + assert len(result["AAPL"]) == 2 diff --git a/tests/test_nla_memory.py b/tests/test_nla_memory.py new file mode 100644 index 0000000..b6d2e6b --- /dev/null +++ b/tests/test_nla_memory.py @@ -0,0 +1,66 @@ +"""Tests for NLA-style research memory.""" + +import json + +from src.agent.context_builder import RegimeContext +from src.research.nla_memory import NLAMemoryStore + + +def test_nla_memory_stores_agent_summary_and_context(tmp_path): + store = NLAMemoryStore(tmp_path / "research.db") + record = store.store_agent_summary( + regime="MidVol-Bull", + strategy_type="momentum", + params={"fast_window": 50, "slow_window": 200}, + narrative="Slow crossover narrative from explicit proposal reasoning.", + metrics={"sharpe_ratio": 1.1, "max_drawdown": 0.08}, + alpha_id="alpha123", + tags=("test",), + ) + + recalled = store.recall(regime="MidVol-Bull", strategy_type="momentum") + context = store.to_prompt_context("MidVol-Bull", "momentum") + + assert recalled[0].record_id == record.record_id + assert recalled[0].alpha_id == "alpha123" + assert "NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES" in context + assert "Slow crossover narrative" in context + assert "not as hidden chain-of-thought" in context + + +def test_nla_memory_ingests_gemma4_jsonl(tmp_path): + jsonl_path = tmp_path / "nla_eval.jsonl" + payload = { + "text": "momentum proposal with fast_window=20", + "explanation": "Activation narrative favors slower confirmation.", + "direction_mse": 0.15, + "cosine": 0.82, + } + jsonl_path.write_text(json.dumps(payload) + "\n", encoding="utf-8") + + store = NLAMemoryStore(tmp_path / "research.db") + records = store.ingest_nla_jsonl( + jsonl_path, + regime="LowVol-Bull", + strategy_type="momentum", + params={"fast_window": 20}, + tags=("gemma4",), + ) + + assert len(records) == 1 + assert records[0].source_model == "gemma4-nla" + assert records[0].quality_score == 0.82 - 0.15 + assert store.recall(regime="LowVol-Bull", strategy_type="momentum")[0].narrative.startswith( + "Activation narrative" + ) + + +def test_regime_context_includes_nla_memory(): + context = RegimeContext( + regime_label="MidVol-Bull", + nla_memory_context="NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES:\n - note", + ) + + prompt = context.to_prompt_string() + + assert "NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES" in prompt diff --git a/tests/test_research_workspace.py b/tests/test_research_workspace.py new file mode 100644 index 0000000..2355179 --- /dev/null +++ b/tests/test_research_workspace.py @@ -0,0 +1,81 @@ +"""Tests for the platform research workspace layer.""" + +import pandas as pd + +from src.research.workspace import ( + build_research_memo, + load_research_workspace, + runs_to_dataframe, + summarize_workspace, +) + + +def test_load_research_workspace_aggregates_csv_artifacts(tmp_path): + exp_dir = tmp_path / "experiments" + exp_dir.mkdir() + + pd.DataFrame({ + "test_start": ["2024-01-01", "2024-07-01", "2025-01-01"], + "test_end": ["2024-06-30", "2024-12-31", "2025-06-30"], + "sharpe": [1.0, 0.5, -0.25], + "return": [0.10, 0.04, -0.02], + "drawdown": [0.04, 0.07, 0.10], + "params": ["{}", "{}", "{}"], + }).to_csv(exp_dir / "walk_forward_results.csv", index=False) + + pd.DataFrame({ + "strategy": ["Buy and Hold", "Golden Cross"], + "sharpe": ["Ticker\nSPY 0.80\ndtype: float64", "0.55"], + "return": ["Ticker\nSPY 0.25\ndtype: float64", "0.08"], + "drawdown": ["Ticker\nSPY -0.20\ndtype: float64", "0.05"], + }).to_csv(exp_dir / "static_baseline_results.csv", index=False) + + runs = load_research_workspace(exp_dir, tmp_path / "missing.db") + + assert len(runs) == 3 + assert any(run.run_id == "wf-momentum" for run in runs) + assert any(run.name == "Buy and Hold benchmark" for run in runs) + + walk_forward = next(run for run in runs if run.run_id == "wf-momentum") + buy_hold = next(run for run in runs if run.name == "Buy and Hold benchmark") + assert walk_forward.metrics["n_windows"] == 3 + assert walk_forward.validation_status == "pass" + assert buy_hold.sharpe == 0.8 + assert buy_hold.total_return == 0.25 + + +def test_workspace_summary_selects_best_robustness(tmp_path): + exp_dir = tmp_path / "experiments" + exp_dir.mkdir() + + pd.DataFrame({ + "strategy": ["Low Drawdown", "High Sharpe"], + "sharpe": [0.7, 0.9], + "return": [0.08, 0.12], + "drawdown": [0.03, 0.35], + }).to_csv(exp_dir / "static_baseline_results.csv", index=False) + + runs = load_research_workspace(exp_dir, tmp_path / "missing.db") + summary = summarize_workspace(runs) + + assert summary["run_count"] == 2 + assert summary["best_sharpe"] == 0.9 + assert summary["best_run"].name == "Low Drawdown benchmark" + + +def test_dataframe_and_memo_are_dashboard_ready(tmp_path): + exp_dir = tmp_path / "experiments" + exp_dir.mkdir() + + pd.DataFrame({ + "type": ["With Context", "With Context", "No Context"], + "sharpe": [0.4, 0.6, 0.2], + }).to_csv(exp_dir / "ablation_results.csv", index=False) + + runs = load_research_workspace(exp_dir, tmp_path / "missing.db") + df = runs_to_dataframe(runs) + memo = build_research_memo(runs[0]) + + assert {"Run ID", "Name", "Sharpe", "Robustness", "Validation"}.issubset(df.columns) + assert "Validation Checks" in memo + assert runs[0].name in memo