diff --git a/README.md b/README.md
index c0b1f8e..de393a2 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
[](https://github.com/OnePunchMonk/AgentQuant/actions)

-
+
---
@@ -20,6 +20,30 @@ AgentQuant is a regime-adaptive research platform that runs a real **ReAct agent
---
+## Platform Preview
+
+### Live Data Selection
+
+Choose a date range, select preset stocks/ETFs, or type any yfinance ticker. AgentQuant fetches data on demand and only uses the local cache when it covers the requested range.
+
+
+
+### Research Workspace
+
+The dashboard tracks experiment runs, baselines, robustness scores, validation checks, and report-ready research notes in one place.
+
+
+
+### Alpha + NLA Memory
+
+Agent Lab stores backtested alpha candidates and explicit NLA-style research narratives so future runs can retrieve prior evidence. NLA memory is based on explicit activation narratives or imported `nla-gemma4` JSONL outputs, not hidden chain-of-thought.
+
+
+
+
+
+---
+
## Architecture
```
@@ -40,6 +64,9 @@ analyze ──► hypothesize ──► backtest ──► reflect
| `src/agent/context_builder.py` | `RegimeContext` dataclass with VIX percentile, multi-horizon momentum |
| `src/agent/parameter_grid.py` | Canonical grids per strategy; regime-aware prior selection |
| `src/agent/strategy_memory.py` | SQLite cross-session memory |
+| `src/research/alpha_store.py` | SQLite memory for accepted, watchlisted, and rejected alpha candidates |
+| `src/research/nla_memory.py` | Explicit NLA-style narrative memory and `nla-gemma4` JSONL ingestion |
+| `src/research/workspace.py` | Experiment registry, robustness summaries, and research memo generation |
| `src/features/regime.py` | Percentile-based regime detection + optional HMM |
| `src/features/engine.py` | RSI, MACD, Bollinger, ATR, multi-horizon vol, stationarity checks |
| `src/features/lookback_guard.py` | `WarmupEnforcer` prevents look-ahead bias |
@@ -108,14 +135,18 @@ pip install -e ".[dev]"
pytest tests/ -v
```
-**42 tests passing** across:
+**55 tests passing** across:
- `test_config.py` — Pydantic validation
+- `test_data_ingest.py` — live ticker fetch and cache range coverage
- `test_metrics.py` — Sharpe, drawdown, Calmar, Sortino
- `test_regime.py` — VIX percentile regime classification
- `test_features.py` — RSI bounds, momentum accuracy, new indicator columns
- `test_strategies.py` — All 6 strategies produce valid `{-1,0,1}` signals
- `test_backtest.py` — Runner, zero-signal flat equity, metrics keys
- `test_proposal_generator.py` — Fallback chain without API key
+- `test_alpha_store.py` — alpha memory persistence and retrieval
+- `test_nla_memory.py` — explicit NLA memory and JSONL ingestion
+- `test_research_workspace.py` — experiment registry summaries and memos
---
@@ -136,6 +167,10 @@ AgentQuant/
│ ├── data/
│ │ ├── ingest.py # yfinance + FRED with TTL cache
│ │ └── schemas.py # Data schemas
+│ ├── research/
+│ │ ├── alpha_store.py # SQLite alpha candidate memory
+│ │ ├── nla_memory.py # Explicit NLA narrative memory
+│ │ └── workspace.py # Experiment registry + research memos
│ ├── features/
│ │ ├── engine.py # RSI, MACD, Bollinger, ATR, multi-horizon vol
│ │ ├── regime.py # VIX-percentile + optional HMM detection
@@ -158,7 +193,7 @@ AgentQuant/
├── experiments/
│ ├── results_store.py # SQLite experiment tracking
│ └── walk_forward.py # Walk-forward validation
-├── tests/ # 42 tests
+├── tests/ # 55 tests
├── docs/ # Documentation
├── config.yaml # Project configuration
├── .env.example # Environment template
diff --git a/pyproject.toml b/pyproject.toml
index 913777e..38cfc76 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
"scipy>=1.12",
"streamlit>=1.39",
"plotly>=5.17",
+ "matplotlib>=3.8",
"pyarrow>=16.0",
"tabulate>=0.9",
"statsmodels>=0.14",
@@ -67,4 +68,4 @@ ignore = ["E501"]
[tool.setuptools.packages.find]
where = ["."]
-include = ["src*"]
\ No newline at end of file
+include = ["src*"]
diff --git a/screenshots/agent_lab_nla_memory_desktop.jpg b/screenshots/agent_lab_nla_memory_desktop.jpg
new file mode 100644
index 0000000..5e0ca1e
Binary files /dev/null and b/screenshots/agent_lab_nla_memory_desktop.jpg differ
diff --git a/screenshots/live_data_sidebar_desktop.jpg b/screenshots/live_data_sidebar_desktop.jpg
new file mode 100644
index 0000000..b4b0ba4
Binary files /dev/null and b/screenshots/live_data_sidebar_desktop.jpg differ
diff --git a/screenshots/nla_memory_desktop.jpg b/screenshots/nla_memory_desktop.jpg
new file mode 100644
index 0000000..f57418f
Binary files /dev/null and b/screenshots/nla_memory_desktop.jpg differ
diff --git a/screenshots/research_workspace_desktop.jpg b/screenshots/research_workspace_desktop.jpg
new file mode 100644
index 0000000..9374c8c
Binary files /dev/null and b/screenshots/research_workspace_desktop.jpg differ
diff --git a/src/agent/agent_graph.py b/src/agent/agent_graph.py
index d2098ee..0ee13b8 100644
--- a/src/agent/agent_graph.py
+++ b/src/agent/agent_graph.py
@@ -11,16 +11,15 @@
import json
import logging
-from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, TypedDict
-import numpy as np
import pandas as pd
from src.agent.context_builder import RegimeContext, build_context
from src.agent.proposal_generator import Proposal, ProposalGenerator
from src.agent.strategy_memory import PastResult, StrategyMemory
-from src.backtest.metrics import PerformanceMetrics
+from src.research.alpha_store import AlphaStore
+from src.research.nla_memory import NLAMemoryStore
from src.utils.config import config
logger = logging.getLogger(__name__)
@@ -60,10 +59,16 @@ def analyze_node(state: AgentState) -> AgentState:
# Get memory context
memory = StrategyMemory()
memory_ctx = memory.to_prompt_context(regime_label, state.get("strategy_type", "momentum"))
+ alpha_memory = AlphaStore()
+ alpha_ctx = alpha_memory.to_prompt_context(regime_label, state.get("strategy_type", "momentum"))
+ nla_memory = NLAMemoryStore()
+ nla_ctx = nla_memory.to_prompt_context(regime_label, state.get("strategy_type", "momentum"))
+ context.alpha_memory_context = alpha_ctx
+ context.nla_memory_context = nla_ctx
state["features_df"] = features_df
state["context"] = context
- state["memory_context"] = memory_ctx
+ state["memory_context"] = f"{memory_ctx}\n\n{alpha_ctx}\n\n{nla_ctx}"
state["run_log"] = state.get("run_log", [])
state["run_log"].append(f"Regime: {regime_label} (confidence: {context.regime_confidence:.0%})")
@@ -212,8 +217,40 @@ def store_node(state: AgentState) -> AgentState:
reasoning=best.get("reasoning", ""),
)
run_id = memory.store(result)
- state["run_log"].append(f"Store: Persisted result {run_id} to memory.")
- logger.info("Persisted result %s to strategy memory.", run_id)
+ alpha = AlphaStore().store_backtest_result(
+ regime=regime,
+ strategy_type=state.get("strategy_type", "momentum"),
+ params=best["params"],
+ metrics={
+ "sharpe_ratio": best.get("sharpe", 0.0),
+ "total_return": best.get("total_return", 0.0),
+ "max_drawdown": best.get("max_drawdown", 0.0),
+ "num_trades": best.get("num_trades", 0),
+ },
+ assets=[state.get("asset", config.reference_asset)],
+ generation_method=best.get("generation_method", ""),
+ confidence=best.get("confidence", 0.0),
+ reasoning=best.get("reasoning", ""),
+ source="agent_graph",
+ )
+ nla = NLAMemoryStore().store_agent_summary(
+ regime=regime,
+ strategy_type=state.get("strategy_type", "momentum"),
+ params=best["params"],
+ metrics={
+ "sharpe_ratio": best.get("sharpe", 0.0),
+ "total_return": best.get("total_return", 0.0),
+ "max_drawdown": best.get("max_drawdown", 0.0),
+ "num_trades": best.get("num_trades", 0),
+ },
+ narrative=best.get("reasoning", "") or "Stored best proposal from explicit agent run.",
+ alpha_id=alpha.alpha_id,
+ tags=("agent_graph", best.get("generation_method", "")),
+ )
+ state["run_log"].append(
+ f"Store: Persisted result {run_id}, alpha {alpha.alpha_id}, NLA note {nla.record_id}."
+ )
+ logger.info("Persisted result %s, alpha %s, NLA note %s.", run_id, alpha.alpha_id, nla.record_id)
return state
diff --git a/src/agent/context_builder.py b/src/agent/context_builder.py
index 6c300e2..acc3308 100644
--- a/src/agent/context_builder.py
+++ b/src/agent/context_builder.py
@@ -7,10 +7,8 @@
"""
import logging
-from dataclasses import dataclass, field
-from typing import Optional
+from dataclasses import dataclass
-import numpy as np
import pandas as pd
from scipy import stats as scipy_stats
@@ -36,10 +34,12 @@ class RegimeContext:
rsi_14: float = 50.0
price_vs_sma200: float = 0.0
regime_confidence: float = 0.5
+ alpha_memory_context: str = ""
+ nla_memory_context: str = ""
def to_prompt_string(self) -> str:
"""Format context as structured text for LLM prompt injection."""
- return (
+ context = (
f"MARKET CONTEXT:\n"
f" Regime: {self.regime_label} (confidence: {self.regime_confidence:.0%})\n"
f" VIX: {self.vix_level:.1f} (at {self.vix_percentile:.0f}th percentile, trailing 1Y)\n"
@@ -55,6 +55,11 @@ def to_prompt_string(self) -> str:
f" RSI (14): {self.rsi_14:.1f}\n"
f" Drawdown from peak: {self.drawdown_from_peak * 100:.1f}%\n"
)
+ if self.alpha_memory_context:
+ context += f"\n{self.alpha_memory_context}\n"
+ if self.nla_memory_context:
+ context += f"\n{self.nla_memory_context}\n"
+ return context
def build_context(features_df: pd.DataFrame) -> RegimeContext:
diff --git a/src/agent/proposal_generator.py b/src/agent/proposal_generator.py
index ad6b6d4..2abb624 100644
--- a/src/agent/proposal_generator.py
+++ b/src/agent/proposal_generator.py
@@ -12,6 +12,7 @@
from src.agent.base_planner import BasePlanner, create_planner
from src.agent.context_builder import RegimeContext
from src.agent.parameter_grid import ParameterGrid
+from src.research.alpha_store import AlphaStore
logger = logging.getLogger(__name__)
@@ -94,10 +95,17 @@ class ProposalGenerator:
Fallback chain: LLM → GridSearch → Random.
"""
- def __init__(self, planner: Optional[BasePlanner] = None):
+ def __init__(
+ self,
+ planner: Optional[BasePlanner] = None,
+ alpha_store: Optional[AlphaStore] = None,
+ use_alpha_memory: bool = True,
+ ):
self.planner = planner or create_planner()
self.grid = ParameterGrid()
self.validator = ProposalValidator()
+ self.alpha_store = alpha_store or AlphaStore()
+ self.use_alpha_memory = use_alpha_memory
def generate(
self,
@@ -120,6 +128,19 @@ def generate(
if len(proposals) < n_proposals:
needed = n_proposals - len(proposals)
existing_params = {tuple(sorted(p.params.items())) for p in proposals}
+ rejected_params = self._rejected_param_keys(context, strategy_type)
+
+ if self.use_alpha_memory:
+ memory_proposals = self._memory_generate(context, strategy_type, needed)
+ for mp in memory_proposals:
+ if len(proposals) >= n_proposals:
+ break
+ key = tuple(sorted(mp.params.items()))
+ if key not in existing_params:
+ proposals.append(mp)
+ existing_params.add(key)
+
+ needed = n_proposals - len(proposals)
grid_proposals = self.grid.top_k_by_prior(
strategy_type, needed + 3, context.regime_label
)
@@ -127,7 +148,7 @@ def generate(
if len(proposals) >= n_proposals:
break
key = tuple(sorted(gp.items()))
- if key not in existing_params:
+ if key not in existing_params and key not in rejected_params:
proposals.append(Proposal(
params=gp,
confidence=0.3,
@@ -139,12 +160,13 @@ def generate(
# Last resort: random from grid
if len(proposals) < n_proposals:
needed = n_proposals - len(proposals)
+ rejected_params = self._rejected_param_keys(context, strategy_type)
for rp in self.grid.random_k(strategy_type, needed + 5):
if len(proposals) >= n_proposals:
break
existing_params_set = {tuple(sorted(p.params.items())) for p in proposals}
key = tuple(sorted(rp.items()))
- if key not in existing_params_set:
+ if key not in existing_params_set and key not in rejected_params:
proposals.append(Proposal(
params=rp,
confidence=0.1,
@@ -154,6 +176,53 @@ def generate(
return proposals[:n_proposals]
+ def _memory_generate(
+ self,
+ context: RegimeContext,
+ strategy_type: str,
+ n: int,
+ ) -> List[Proposal]:
+ if n <= 0:
+ return []
+
+ grid_keys = {
+ tuple(sorted(params.items()))
+ for params in self.grid.get_grid(strategy_type)
+ }
+ proposals: List[Proposal] = []
+ candidates = self.alpha_store.recall(
+ regime=context.regime_label,
+ strategy_type=strategy_type,
+ statuses=("accepted", "watch"),
+ n=n,
+ )
+
+ for candidate in candidates:
+ key = tuple(sorted(candidate.params.items()))
+ if grid_keys and key not in grid_keys:
+ continue
+ proposals.append(
+ Proposal(
+ params=candidate.params,
+ confidence=max(candidate.confidence, 0.55),
+ regime_characteristic_used="alpha_memory",
+ reasoning=f"Retrieved from alpha DB: {candidate.thesis}",
+ generation_method="alpha_memory",
+ )
+ )
+ return proposals
+
+ def _rejected_param_keys(self, context: RegimeContext, strategy_type: str) -> set:
+ if not self.use_alpha_memory:
+ return set()
+ rejected = self.alpha_store.recall(
+ regime=context.regime_label,
+ strategy_type=strategy_type,
+ statuses=("rejected",),
+ n=50,
+ )
+ return {tuple(sorted(candidate.params.items())) for candidate in rejected}
+
def _llm_generate(
self, context: RegimeContext, strategy_type: str, n: int
) -> List[Proposal]:
diff --git a/src/app/streamlit_app.py b/src/app/streamlit_app.py
index 69fc0c3..b861daf 100644
--- a/src/app/streamlit_app.py
+++ b/src/app/streamlit_app.py
@@ -15,23 +15,30 @@
"""
import logging
-import os
from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional
+from pathlib import Path
+from typing import Any, Dict, List
import matplotlib.pyplot as plt
-import numpy as np
import pandas as pd
+import plotly.express as px
import streamlit as st
from src.agent.context_builder import build_context
from src.agent.parameter_grid import ParameterGrid
from src.agent.proposal_generator import ProposalGenerator
-from src.backtest.metrics import PerformanceMetrics
from src.backtest.runner import run_backtest
from src.data.ingest import fetch_ohlcv_data
from src.features.engine import compute_features
-from src.features.regime import detect_regime, detect_regime_full
+from src.features.regime import detect_regime_full
+from src.research.alpha_store import AlphaCandidate, AlphaStore
+from src.research.nla_memory import NLAMemoryStore, NLARecord
+from src.research.workspace import (
+ build_research_memo,
+ load_research_workspace,
+ runs_to_dataframe,
+ summarize_workspace,
+)
from src.strategies.strategy_registry import STRATEGY_REGISTRY
from src.utils.config import config
from src.utils.logging import setup_logging
@@ -61,6 +68,15 @@
.regime-crisis { background: #f8d7da; color: #721c24; border: 2px solid #721c24; }
.regime-neutral{ background: #fff3cd; color: #856404; }
.metric-card { background: #f8f9fa; border-radius: 8px; padding: 0.8rem; margin: 0.3rem; }
+ .workspace-note {
+ border-left: 4px solid #1f77b4;
+ padding: 0.8rem 1rem;
+ background: #f6f8fa;
+ border-radius: 6px;
+ }
+ .status-pass { color: #116329; font-weight: 700; }
+ .status-warn { color: #9a6700; font-weight: 700; }
+ .status-fail { color: #cf222e; font-weight: 700; }
""", unsafe_allow_html=True)
@@ -68,11 +84,16 @@
# ─── Cached helpers ────────────────────────────────────────────────────────────
@st.cache_data(ttl=3600, show_spinner="Fetching market data…")
-def _fetch_data_cached(assets: tuple, start: str, end: str) -> Dict[str, pd.DataFrame]:
+def _fetch_data_cached(
+ assets: tuple,
+ start: str,
+ end: str,
+ force_download: bool = False,
+) -> Dict[str, pd.DataFrame]:
"""Cache market data for 1 hour to avoid re-downloading on every rerun."""
all_data: Dict[str, pd.DataFrame] = {}
for ticker in assets:
- result = fetch_ohlcv_data(ticker, start, end)
+ result = fetch_ohlcv_data(ticker, start, end, force_download=force_download)
if ticker in result:
all_data[ticker] = result[ticker]
return all_data
@@ -105,16 +126,173 @@ def _regime_badge(regime_label: str) -> str:
return f'
📊 Market Regime: {regime_label}
'
+def _format_pct(value: float) -> str:
+ return f"{value * 100:.1f}%"
+
+
+def _status_html(status: str) -> str:
+ status_l = status.lower()
+ label = {"pass": "Pass", "warn": "Review", "fail": "Fail"}.get(status_l, status.title())
+ return f'{label}'
+
+
+def _normalize_tickers(tickers: List[str]) -> List[str]:
+ normalized = []
+ seen = set()
+ for ticker in tickers:
+ clean = ticker.strip().upper()
+ if not clean or clean in seen:
+ continue
+ seen.add(clean)
+ normalized.append(clean)
+ return normalized
+
+
+def _alpha_candidates_to_dataframe(candidates: List[AlphaCandidate]) -> pd.DataFrame:
+ rows = [candidate.as_row() for candidate in candidates]
+ if not rows:
+ return pd.DataFrame()
+ df = pd.DataFrame(rows)
+ for col in ("Return", "Max Drawdown"):
+ if col in df:
+ df[col] = df[col].map(_format_pct)
+ return df
+
+
+def _nla_records_to_dataframe(records: List[NLARecord]) -> pd.DataFrame:
+ rows = [record.as_row() for record in records]
+ if not rows:
+ return pd.DataFrame()
+ return pd.DataFrame(rows)
+
+
+def render_research_workspace() -> None:
+ """Render the platform-style experiment registry."""
+ runs = load_research_workspace(
+ experiments_dir=Path("experiments"),
+ results_db_path=Path(config.results_db_path),
+ )
+ summary = summarize_workspace(runs)
+
+ st.header("Research Workspace")
+ st.caption(
+ "A local-first registry for experiments, baselines, validation checks, and report-ready research memos."
+ )
+
+ if not runs:
+ st.info("No experiment artifacts found yet. Run a walk-forward study or backtest to populate the workspace.")
+ return
+
+ best_run = summary["best_run"]
+ k1, k2, k3, k4 = st.columns(4)
+ k1.metric("Tracked Runs", summary["run_count"])
+ k2.metric("Best Sharpe", f"{summary['best_sharpe']:.3f}")
+ k3.metric("Best Robustness", f"{summary['best_robustness']:.3f}")
+ k4.metric("Validation Pass Rate", _format_pct(summary["validation_pass_rate"]))
+
+ if best_run:
+ st.markdown(
+ f"""
+
+ Current leader: {best_run.name} with robustness
+ {best_run.robustness_score:.3f}. Use this as the anchor run when comparing new agent or swarm experiments.
+
+ """,
+ unsafe_allow_html=True,
+ )
+
+ df_runs = runs_to_dataframe(runs)
+ display_df = df_runs.copy()
+ for col in ("Return", "Max Drawdown"):
+ if col in display_df:
+ display_df[col] = display_df[col].map(_format_pct)
+
+ st.subheader("Experiment Registry")
+ st.dataframe(display_df, use_container_width=True, hide_index=True)
+
+ chart_df = df_runs.copy()
+ if not chart_df.empty:
+ min_robustness = chart_df["Robustness"].min()
+ chart_df["Marker Size"] = (chart_df["Robustness"] - min_robustness + 0.1).clip(lower=0.1)
+ st.subheader("Robustness Map")
+ fig = px.scatter(
+ chart_df,
+ x="Max Drawdown",
+ y="Sharpe",
+ size="Marker Size",
+ color="Mode",
+ hover_name="Name",
+ hover_data=["Strategy", "Source", "Validation", "Robustness"],
+ title="Sharpe vs. Drawdown by Research Run",
+ )
+ fig.update_layout(height=420, margin=dict(l=10, r=10, t=50, b=10))
+ st.plotly_chart(fig, use_container_width=True)
+
+ st.subheader("Run Inspector")
+ run_lookup = {f"{run.name} ({run.run_id})": run for run in runs}
+ selected_label = st.selectbox("Select a research run", list(run_lookup.keys()))
+ selected = run_lookup[selected_label]
+
+ left, right = st.columns([1, 1])
+ with left:
+ st.markdown(build_research_memo(selected))
+
+ with right:
+ st.markdown("### Validation")
+ for check in selected.validation_checks:
+ st.markdown(
+ f"- {_status_html(check.status)} **{check.name}:** {check.detail}",
+ unsafe_allow_html=True,
+ )
+
+ st.markdown("### Artifacts")
+ for artifact in selected.artifacts:
+ st.code(artifact)
+
+ st.subheader("Alpha Memory")
+ alpha_store = AlphaStore()
+ alpha_candidates = alpha_store.list_recent(25)
+ if alpha_candidates:
+ accepted = sum(1 for alpha in alpha_candidates if alpha.status == "accepted")
+ watch = sum(1 for alpha in alpha_candidates if alpha.status == "watch")
+ rejected = sum(1 for alpha in alpha_candidates if alpha.status == "rejected")
+ a1, a2, a3, a4 = st.columns(4)
+ a1.metric("Stored Alphas", len(alpha_candidates))
+ a2.metric("Accepted", accepted)
+ a3.metric("Watchlist", watch)
+ a4.metric("Rejected", rejected)
+ st.dataframe(_alpha_candidates_to_dataframe(alpha_candidates), use_container_width=True, hide_index=True)
+ else:
+ st.info("Alpha memory is empty. Run Agent Lab to generate and persist candidates.")
+
+ st.subheader("NLA Memory")
+ nla_store = NLAMemoryStore()
+ nla_records = nla_store.list_recent(25)
+ if nla_records:
+ n1, n2, n3 = st.columns(3)
+ n1.metric("Stored NLA Notes", len(nla_records))
+ n2.metric("Avg Quality", f"{sum(r.quality_score for r in nla_records) / len(nla_records):.3f}")
+ n3.metric("Gemma/NLA Imports", sum(1 for r in nla_records if "nla" in r.source_model.lower()))
+ st.dataframe(_nla_records_to_dataframe(nla_records), use_container_width=True, hide_index=True)
+ else:
+ st.info("NLA memory is empty. Agent Lab will write explicit summaries; Gemma4 NLA JSONL can be imported later.")
+
+
+def render_agent_memory_context(regime_label: str, strategy_type: str) -> None:
+ alpha_context = AlphaStore().to_prompt_context(regime_label, strategy_type, n=5)
+ nla_context = NLAMemoryStore().to_prompt_context(regime_label, strategy_type, n=5)
+ with st.expander("Alpha memory used for this run", expanded=False):
+ st.code(alpha_context)
+ with st.expander("NLA memory used for this run", expanded=False):
+ st.code(nla_context)
+
+
# ─── Sidebar ───────────────────────────────────────────────────────────────────
def render_sidebar() -> Dict[str, Any]:
st.sidebar.title("⚙️ AgentQuant")
st.sidebar.markdown("---")
- available_assets = [f.stem for f in (
- __import__("pathlib").Path(config.data_path).glob("*.parquet")
- ) if not f.stem.startswith("FRED_")] or config.universe
-
st.sidebar.header("Date Range")
today = datetime.now()
end_default = today - timedelta(days=1)
@@ -124,11 +302,35 @@ def render_sidebar() -> Dict[str, Any]:
end_date = st.sidebar.date_input("End Date", value=end_default, max_value=today)
st.sidebar.header("Assets")
- selected_assets = st.sidebar.multiselect(
- "Select Assets",
- options=available_assets,
- default=available_assets[:4] if len(available_assets) >= 4 else available_assets,
+ starter_assets = _normalize_tickers(
+ config.universe
+ + [
+ "AAPL",
+ "MSFT",
+ "NVDA",
+ "AMZN",
+ "META",
+ "GOOGL",
+ "TSLA",
+ "JPM",
+ "XOM",
+ "BTC-USD",
+ "ETH-USD",
+ ]
+ )
+ selected_presets = st.sidebar.multiselect(
+ "Choose stocks or ETFs",
+ options=starter_assets,
+ default=config.universe[:4] if len(config.universe) >= 4 else config.universe,
+ )
+ custom_tickers = st.sidebar.text_input(
+ "Add tickers",
+ value="",
+ placeholder="e.g. AAPL, MSFT, NVDA",
)
+ custom_assets = custom_tickers.replace("\n", ",").split(",")
+ selected_assets = _normalize_tickers(selected_presets + custom_assets)
+ force_download = st.sidebar.checkbox("Refresh market data now", value=False)
st.sidebar.header("Strategy")
strategy_type = st.sidebar.selectbox(
@@ -145,6 +347,7 @@ def render_sidebar() -> Dict[str, Any]:
"start_date": start_date,
"end_date": end_date,
"selected_assets": selected_assets,
+ "force_download": force_download,
"strategy_type": strategy_type,
"n_proposals": n_proposals,
"run_agent": run_btn,
@@ -154,7 +357,7 @@ def render_sidebar() -> Dict[str, Any]:
# ─── Main ──────────────────────────────────────────────────────────────────────
def main():
- st.title("🤖 AgentQuant: AI Trading Research Platform")
+ st.title("🤖 AgentQuant Research Platform")
# Session state init
for key, default in [
@@ -163,12 +366,27 @@ def main():
("regime_label", ""),
("regime_signals", None),
("_data_cache", {}),
+ ("stored_alphas", []),
+ ("stored_nla_records", []),
+ ("alpha_memory_context", ""),
+ ("nla_memory_context", ""),
]:
if key not in st.session_state:
st.session_state[key] = default
opts = render_sidebar()
+ render_research_workspace()
+ st.divider()
+ st.header("Agent Lab")
+ st.caption("Generate new strategy proposals, backtest them, and promote successful runs into the research workspace.")
+ if st.session_state.alpha_memory_context:
+ with st.expander("Latest alpha memory context", expanded=False):
+ st.code(st.session_state.alpha_memory_context)
+ if st.session_state.nla_memory_context:
+ with st.expander("Latest NLA memory context", expanded=False):
+ st.code(st.session_state.nla_memory_context)
+
# ── Regime banner (always show if we have a regime) ──────────────────────
if st.session_state.regime_label:
st.markdown(_regime_badge(st.session_state.regime_label), unsafe_allow_html=True)
@@ -195,7 +413,12 @@ def main():
try:
# Step 1: Fetch data
progress.progress(10, text="📥 Fetching market data…")
- data = _fetch_data_cached(assets_tuple, start_str, end_str)
+ data = _fetch_data_cached(
+ assets_tuple,
+ start_str,
+ end_str,
+ force_download=opts["force_download"],
+ )
st.session_state._data_cache = data
if config.reference_asset not in data:
@@ -211,15 +434,30 @@ def main():
signals = detect_regime_full(features_df)
context = build_context(features_df)
context.regime_label = signals.regime_label
+ alpha_store = AlphaStore()
+ nla_store = NLAMemoryStore()
+ context.alpha_memory_context = alpha_store.to_prompt_context(
+ signals.regime_label,
+ opts["strategy_type"],
+ n=5,
+ )
+ context.nla_memory_context = nla_store.to_prompt_context(
+ signals.regime_label,
+ opts["strategy_type"],
+ n=5,
+ )
st.session_state.regime_label = signals.regime_label
st.session_state.regime_signals = signals
+ st.session_state.alpha_memory_context = context.alpha_memory_context
+ st.session_state.nla_memory_context = context.nla_memory_context
# Refresh regime banner immediately
st.markdown(_regime_badge(signals.regime_label), unsafe_allow_html=True)
+ render_agent_memory_context(signals.regime_label, opts["strategy_type"])
# Step 4: Generate proposals
progress.progress(50, text="🧠 Generating strategy proposals…")
- generator = ProposalGenerator()
+ generator = ProposalGenerator(alpha_store=alpha_store)
proposals = generator.generate(
context=context,
n_proposals=opts["n_proposals"],
@@ -228,6 +466,8 @@ def main():
# Step 5: Backtest each proposal
backtest_results = {}
+ stored_alphas = []
+ stored_nla_records = []
for i, proposal in enumerate(proposals):
pct = 55 + int(40 * (i + 1) / len(proposals))
progress.progress(
@@ -247,15 +487,44 @@ def main():
"proposal": proposal,
"result": result,
}
+ stored_alphas.append(
+ alpha_store.store_backtest_result(
+ regime=signals.regime_label,
+ strategy_type=opts["strategy_type"],
+ params=proposal.params,
+ metrics=result["metrics"],
+ assets=list(opts["selected_assets"]),
+ generation_method=proposal.generation_method,
+ confidence=proposal.confidence,
+ reasoning=proposal.reasoning,
+ source="streamlit_agent_lab",
+ )
+ )
+ stored_nla_records.append(
+ nla_store.store_agent_summary(
+ regime=signals.regime_label,
+ strategy_type=opts["strategy_type"],
+ params=proposal.params,
+ metrics=result["metrics"],
+ narrative=proposal.reasoning
+ or "Explicit Agent Lab summary for this tested proposal.",
+ alpha_id=stored_alphas[-1].alpha_id,
+ tags=("streamlit_agent_lab", proposal.generation_method),
+ )
+ )
except Exception as e:
logger.warning("Backtest failed for proposal %d: %s", i + 1, e)
st.session_state.strategies = proposals
st.session_state.backtest_results = backtest_results
+ st.session_state.stored_alphas = stored_alphas
+ st.session_state.stored_nla_records = stored_nla_records
progress.progress(100, text="✅ Done!")
st.success(
- f"Generated {len(proposals)} proposals, {len(backtest_results)} backtested successfully."
+ f"Generated {len(proposals)} proposals, {len(backtest_results)} backtested successfully, "
+ f"stored {len(stored_alphas)} alpha candidates and "
+ f"{len(stored_nla_records)} NLA memory records."
)
except Exception as e:
@@ -268,6 +537,21 @@ def main():
if st.session_state.backtest_results:
results = st.session_state.backtest_results
+ if st.session_state.stored_alphas:
+ st.subheader("Stored Alpha Candidates")
+ st.dataframe(
+ _alpha_candidates_to_dataframe(st.session_state.stored_alphas),
+ use_container_width=True,
+ hide_index=True,
+ )
+ if st.session_state.stored_nla_records:
+ st.subheader("Stored NLA Memory")
+ st.dataframe(
+ _nla_records_to_dataframe(st.session_state.stored_nla_records),
+ use_container_width=True,
+ hide_index=True,
+ )
+
# #25: Comparative table — most important quant view
st.subheader("📊 Strategy Comparison")
rows = []
@@ -288,8 +572,6 @@ def main():
})
if rows:
df_cmp = pd.DataFrame(rows).set_index("Strategy")
- # Highlight best Sharpe
- best_sharpe_idx = df_cmp["Sharpe"].astype(float).idxmax()
st.dataframe(
df_cmp.style.highlight_max(subset=["Sharpe"], color="#d4edda")
.highlight_min(subset=["Max DD"], color="#d4edda"),
diff --git a/src/data/ingest.py b/src/data/ingest.py
index 4513434..a2d8891 100644
--- a/src/data/ingest.py
+++ b/src/data/ingest.py
@@ -66,6 +66,29 @@ def _is_cache_valid(file_path: Path) -> bool:
return True
+def _cache_covers_range(df: pd.DataFrame, start_date=None, end_date=None) -> bool:
+ """Return True when a cached frame covers the requested date range."""
+ if df.empty:
+ return False
+ if not isinstance(df.index, pd.DatetimeIndex):
+ return False
+
+ index = df.index.tz_localize(None) if df.index.tz is not None else df.index
+ min_date = index.min().normalize()
+ max_date = index.max().normalize()
+
+ if start_date:
+ requested_start = pd.to_datetime(start_date).normalize()
+ if min_date > requested_start:
+ return False
+ if end_date:
+ requested_end = pd.to_datetime(end_date).normalize()
+ # yfinance treats end as exclusive. Permit a small weekend/holiday gap.
+ if max_date < requested_end - pd.Timedelta(days=3):
+ return False
+ return True
+
+
def fetch_ohlcv_data(
ticker: Optional[str] = None,
start_date=None,
@@ -103,6 +126,9 @@ def fetch_ohlcv_data(
if not force_download and _is_cache_valid(file_path):
try:
df = pd.read_parquet(file_path)
+ if not _cache_covers_range(df, start_date, end_date):
+ logger.info("Cache for %s does not cover requested range. Re-fetching.", t)
+ raise ValueError("cache does not cover requested date range")
if start_date:
df = df[df.index >= pd.to_datetime(start_date)]
if end_date:
@@ -176,4 +202,4 @@ def fetch_fred_data(force_download: bool = False) -> Optional[Dict[str, pd.DataF
except Exception as e:
logger.error("Could not fetch FRED series %s: %s", series_id, e)
- return fred_data
\ No newline at end of file
+ return fred_data
diff --git a/src/research/__init__.py b/src/research/__init__.py
new file mode 100644
index 0000000..9968043
--- /dev/null
+++ b/src/research/__init__.py
@@ -0,0 +1,25 @@
+"""Research workspace primitives for the AgentQuant platform."""
+
+from src.research.alpha_store import AlphaCandidate, AlphaStore
+from src.research.nla_memory import NLAMemoryStore, NLARecord
+from src.research.workspace import (
+ ResearchRun,
+ ValidationCheck,
+ build_research_memo,
+ load_research_workspace,
+ runs_to_dataframe,
+ summarize_workspace,
+)
+
+__all__ = [
+ "AlphaCandidate",
+ "AlphaStore",
+ "NLAMemoryStore",
+ "NLARecord",
+ "ResearchRun",
+ "ValidationCheck",
+ "build_research_memo",
+ "load_research_workspace",
+ "runs_to_dataframe",
+ "summarize_workspace",
+]
diff --git a/src/research/alpha_store.py b/src/research/alpha_store.py
new file mode 100644
index 0000000..940a43c
--- /dev/null
+++ b/src/research/alpha_store.py
@@ -0,0 +1,284 @@
+"""
+Alpha Store
+===========
+
+SQLite-backed memory for alpha candidates discovered by Agent Lab runs.
+Each candidate keeps the thesis, parameters, regime, validation metrics, and
+status so future agents can retrieve the strongest prior evidence.
+"""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, Iterable, List
+
+from src.utils.config import config
+
+
+@dataclass
+class AlphaCandidate:
+ """A discovered alpha candidate and its validation evidence."""
+
+ alpha_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
+ regime: str = "Unknown"
+ strategy_type: str = ""
+ params: Dict[str, Any] = field(default_factory=dict)
+ thesis: str = ""
+ status: str = "watch"
+ sharpe: float = 0.0
+ total_return: float = 0.0
+ max_drawdown: float = 0.0
+ num_trades: int = 0
+ confidence: float = 0.0
+ alpha_score: float = 0.0
+ generation_method: str = ""
+ assets: List[str] = field(default_factory=list)
+ source: str = ""
+
+ def as_row(self) -> Dict[str, Any]:
+ return {
+ "Alpha ID": self.alpha_id,
+ "Status": self.status,
+ "Regime": self.regime,
+ "Strategy": self.strategy_type,
+ "Params": self.params,
+ "Sharpe": round(self.sharpe, 3),
+ "Return": self.total_return,
+ "Max Drawdown": self.max_drawdown,
+ "Trades": self.num_trades,
+ "Score": round(self.alpha_score, 3),
+ "Method": self.generation_method,
+ "Assets": ", ".join(self.assets),
+ "Thesis": self.thesis,
+ }
+
+
+class AlphaStore:
+ """Persistence and retrieval layer for alpha candidates."""
+
+ def __init__(self, db_path: str | Path | None = None):
+ self.db_path = Path(db_path or config.results_db_path)
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
+ self._init_db()
+
+ def _init_db(self) -> None:
+ with sqlite3.connect(self.db_path) as conn:
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS alpha_candidates (
+ alpha_id TEXT PRIMARY KEY,
+ timestamp TEXT NOT NULL,
+ regime TEXT NOT NULL,
+ strategy_type TEXT NOT NULL,
+ params_json TEXT NOT NULL,
+ thesis TEXT DEFAULT '',
+ status TEXT DEFAULT 'watch',
+ sharpe REAL DEFAULT 0.0,
+ total_return REAL DEFAULT 0.0,
+ max_drawdown REAL DEFAULT 0.0,
+ num_trades INTEGER DEFAULT 0,
+ confidence REAL DEFAULT 0.0,
+ alpha_score REAL DEFAULT 0.0,
+ generation_method TEXT DEFAULT '',
+ assets_json TEXT DEFAULT '[]',
+ source TEXT DEFAULT ''
+ )
+ """)
+ conn.execute("""
+ CREATE INDEX IF NOT EXISTS idx_alpha_lookup
+ ON alpha_candidates (regime, strategy_type, status, alpha_score)
+ """)
+
+ def store(self, candidate: AlphaCandidate) -> str:
+ """Insert or replace an alpha candidate."""
+ with sqlite3.connect(self.db_path) as conn:
+ conn.execute(
+ """INSERT OR REPLACE INTO alpha_candidates
+ (alpha_id, timestamp, regime, strategy_type, params_json,
+ thesis, status, sharpe, total_return, max_drawdown,
+ num_trades, confidence, alpha_score, generation_method,
+ assets_json, source)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+ (
+ candidate.alpha_id,
+ candidate.timestamp,
+ candidate.regime,
+ candidate.strategy_type,
+ json.dumps(candidate.params, sort_keys=True),
+ candidate.thesis,
+ candidate.status,
+ candidate.sharpe,
+ candidate.total_return,
+ candidate.max_drawdown,
+ candidate.num_trades,
+ candidate.confidence,
+ candidate.alpha_score,
+ candidate.generation_method,
+ json.dumps(candidate.assets),
+ candidate.source,
+ ),
+ )
+ return candidate.alpha_id
+
+ def store_backtest_result(
+ self,
+ *,
+ regime: str,
+ strategy_type: str,
+ params: Dict[str, Any],
+ metrics: Dict[str, Any],
+ assets: Iterable[str],
+ generation_method: str = "",
+ confidence: float = 0.0,
+ reasoning: str = "",
+ source: str = "",
+ ) -> AlphaCandidate:
+ """Create and persist an alpha candidate from a backtest result."""
+ sharpe = _metric(metrics, "sharpe_ratio", "sharpe")
+ total_return = _metric(metrics, "total_return")
+ max_drawdown = abs(_metric(metrics, "max_drawdown"))
+ num_trades = int(_metric(metrics, "num_trades"))
+ alpha_score = _alpha_score(sharpe, max_drawdown, num_trades)
+ status = _status_from_metrics(sharpe, max_drawdown)
+ thesis = reasoning or _default_thesis(strategy_type, params, regime)
+
+ candidate = AlphaCandidate(
+ regime=regime,
+ strategy_type=strategy_type,
+ params=dict(params),
+ thesis=thesis,
+ status=status,
+ sharpe=sharpe,
+ total_return=total_return,
+ max_drawdown=max_drawdown,
+ num_trades=num_trades,
+ confidence=float(confidence or 0.0),
+ alpha_score=alpha_score,
+ generation_method=generation_method,
+ assets=sorted(set(assets)),
+ source=source,
+ )
+ self.store(candidate)
+ return candidate
+
+ def recall(
+ self,
+ *,
+ regime: str = "",
+ strategy_type: str = "",
+ statuses: Iterable[str] = ("accepted", "watch"),
+ n: int = 5,
+ ) -> List[AlphaCandidate]:
+ """Recall top alpha candidates for similar future agent runs."""
+ query = "SELECT * FROM alpha_candidates WHERE 1=1"
+ params: List[Any] = []
+
+ if regime:
+ query += " AND regime = ?"
+ params.append(regime)
+ if strategy_type:
+ query += " AND strategy_type = ?"
+ params.append(strategy_type)
+
+ statuses = tuple(statuses)
+ if statuses:
+ placeholders = ",".join("?" for _ in statuses)
+ query += f" AND status IN ({placeholders})"
+ params.extend(statuses)
+
+ query += " ORDER BY alpha_score DESC, timestamp DESC LIMIT ?"
+ params.append(n)
+
+ with sqlite3.connect(self.db_path) as conn:
+ conn.row_factory = sqlite3.Row
+ rows = conn.execute(query, params).fetchall()
+
+ return [_row_to_candidate(row) for row in rows]
+
+ def list_recent(self, n: int = 25) -> List[AlphaCandidate]:
+ """Return the most recent alpha candidates regardless of status."""
+ with sqlite3.connect(self.db_path) as conn:
+ conn.row_factory = sqlite3.Row
+ rows = conn.execute(
+ "SELECT * FROM alpha_candidates ORDER BY timestamp DESC LIMIT ?",
+ (n,),
+ ).fetchall()
+ return [_row_to_candidate(row) for row in rows]
+
+ def to_prompt_context(self, regime: str, strategy_type: str = "", n: int = 5) -> str:
+ """Format recalled alpha candidates as retrieval context for an agent."""
+ candidates = self.recall(regime=regime, strategy_type=strategy_type, n=n)
+ rejected = self.recall(
+ regime=regime,
+ strategy_type=strategy_type,
+ statuses=("rejected",),
+ n=n,
+ )
+ if not candidates and not rejected:
+ return "No stored alpha candidates for this regime and strategy yet."
+
+ lines = ["ALPHA MEMORY FROM PRIOR RUNS:"]
+ for alpha in candidates:
+ lines.append(
+ f" - {alpha.status.upper()} {alpha.strategy_type} {json.dumps(alpha.params, sort_keys=True)} "
+ f"| Sharpe={alpha.sharpe:.2f}, Drawdown={alpha.max_drawdown:.1%}, "
+ f"Score={alpha.alpha_score:.2f}, Thesis={alpha.thesis}"
+ )
+ for alpha in rejected:
+ lines.append(
+ f" - REJECTED {alpha.strategy_type} {json.dumps(alpha.params, sort_keys=True)} "
+ f"| Sharpe={alpha.sharpe:.2f}, Drawdown={alpha.max_drawdown:.1%}. "
+ "Avoid repeating this exact configuration unless new evidence changes."
+ )
+ return "\n".join(lines)
+
+
+def _metric(metrics: Dict[str, Any], *names: str) -> float:
+ for name in names:
+ value = metrics.get(name)
+ if value is not None:
+ return float(value)
+ return 0.0
+
+
+def _alpha_score(sharpe: float, max_drawdown: float, num_trades: int) -> float:
+ trade_penalty = min(num_trades / 1_000.0, 0.25)
+ return float(sharpe - max_drawdown - trade_penalty)
+
+
+def _status_from_metrics(sharpe: float, max_drawdown: float) -> str:
+ if sharpe >= config.agent.min_acceptable_sharpe and max_drawdown <= config.agent.risk.max_drawdown:
+ return "accepted"
+ if sharpe > 0:
+ return "watch"
+ return "rejected"
+
+
+def _default_thesis(strategy_type: str, params: Dict[str, Any], regime: str) -> str:
+ return f"{strategy_type} parameters {json.dumps(params, sort_keys=True)} tested in {regime}."
+
+
+def _row_to_candidate(row: sqlite3.Row) -> AlphaCandidate:
+ return AlphaCandidate(
+ alpha_id=row["alpha_id"],
+ timestamp=row["timestamp"],
+ regime=row["regime"],
+ strategy_type=row["strategy_type"],
+ params=json.loads(row["params_json"] or "{}"),
+ thesis=row["thesis"] or "",
+ status=row["status"] or "watch",
+ sharpe=float(row["sharpe"] or 0.0),
+ total_return=float(row["total_return"] or 0.0),
+ max_drawdown=float(row["max_drawdown"] or 0.0),
+ num_trades=int(row["num_trades"] or 0),
+ confidence=float(row["confidence"] or 0.0),
+ alpha_score=float(row["alpha_score"] or 0.0),
+ generation_method=row["generation_method"] or "",
+ assets=json.loads(row["assets_json"] or "[]"),
+ source=row["source"] or "",
+ )
diff --git a/src/research/nla_memory.py b/src/research/nla_memory.py
new file mode 100644
index 0000000..0e71d17
--- /dev/null
+++ b/src/research/nla_memory.py
@@ -0,0 +1,258 @@
+"""
+NLA Memory
+==========
+
+SQLite-backed storage for explicit natural-language activation narratives.
+This module consumes NLA-style outputs such as the JSONL files emitted by
+OnePunchMonk/nla-gemma4 and exposes them as retrieval context for future
+strategy agents.
+"""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, Iterable, List
+
+from src.utils.config import config
+
+
+@dataclass
+class NLARecord:
+ """A stored explicit activation narrative for future research retrieval."""
+
+ record_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
+ regime: str = "Unknown"
+ strategy_type: str = ""
+ params: Dict[str, Any] = field(default_factory=dict)
+ narrative: str = ""
+ source_text: str = ""
+ source_model: str = ""
+ cosine: float = 0.0
+ direction_mse: float = 0.0
+ quality_score: float = 0.0
+ tags: List[str] = field(default_factory=list)
+ alpha_id: str = ""
+
+ def as_row(self) -> Dict[str, Any]:
+ return {
+ "Record ID": self.record_id,
+ "Regime": self.regime,
+ "Strategy": self.strategy_type,
+ "Params": self.params,
+ "Quality": round(self.quality_score, 3),
+ "Cosine": round(self.cosine, 3),
+ "Direction MSE": round(self.direction_mse, 3),
+ "Source": self.source_model,
+ "Tags": ", ".join(self.tags),
+ "Narrative": self.narrative,
+ }
+
+
+class NLAMemoryStore:
+ """Persistence and retrieval layer for explicit NLA narratives."""
+
+ def __init__(self, db_path: str | Path | None = None):
+ self.db_path = Path(db_path or config.results_db_path)
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
+ self._init_db()
+
+ def _init_db(self) -> None:
+ with sqlite3.connect(self.db_path) as conn:
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS nla_records (
+ record_id TEXT PRIMARY KEY,
+ timestamp TEXT NOT NULL,
+ regime TEXT NOT NULL,
+ strategy_type TEXT NOT NULL,
+ params_json TEXT NOT NULL,
+ narrative TEXT DEFAULT '',
+ source_text TEXT DEFAULT '',
+ source_model TEXT DEFAULT '',
+ cosine REAL DEFAULT 0.0,
+ direction_mse REAL DEFAULT 0.0,
+ quality_score REAL DEFAULT 0.0,
+ tags_json TEXT DEFAULT '[]',
+ alpha_id TEXT DEFAULT ''
+ )
+ """)
+ conn.execute("""
+ CREATE INDEX IF NOT EXISTS idx_nla_lookup
+ ON nla_records (regime, strategy_type, quality_score)
+ """)
+
+ def store(self, record: NLARecord) -> str:
+ """Insert or replace an NLA memory record."""
+ with sqlite3.connect(self.db_path) as conn:
+ conn.execute(
+ """INSERT OR REPLACE INTO nla_records
+ (record_id, timestamp, regime, strategy_type, params_json,
+ narrative, source_text, source_model, cosine, direction_mse,
+ quality_score, tags_json, alpha_id)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+ (
+ record.record_id,
+ record.timestamp,
+ record.regime,
+ record.strategy_type,
+ json.dumps(record.params, sort_keys=True),
+ record.narrative,
+ record.source_text,
+ record.source_model,
+ float(record.cosine or 0.0),
+ float(record.direction_mse or 0.0),
+ float(record.quality_score or 0.0),
+ json.dumps(record.tags),
+ record.alpha_id,
+ ),
+ )
+ return record.record_id
+
+ def store_agent_summary(
+ self,
+ *,
+ regime: str,
+ strategy_type: str,
+ params: Dict[str, Any],
+ narrative: str,
+ metrics: Dict[str, Any],
+ alpha_id: str = "",
+ source_model: str = "agentquant-explicit-summary",
+ tags: Iterable[str] = (),
+ ) -> NLARecord:
+ """Store an explicit agent summary as NLA-compatible memory."""
+ sharpe = float(metrics.get("sharpe_ratio", metrics.get("sharpe", 0.0)) or 0.0)
+ max_drawdown = abs(float(metrics.get("max_drawdown", 0.0) or 0.0))
+ quality_score = sharpe - max_drawdown
+ record = NLARecord(
+ regime=regime,
+ strategy_type=strategy_type,
+ params=dict(params),
+ narrative=narrative,
+ source_text=_agent_source_text(strategy_type, params, metrics),
+ source_model=source_model,
+ quality_score=quality_score,
+ tags=sorted(set(tags)),
+ alpha_id=alpha_id,
+ )
+ self.store(record)
+ return record
+
+ def ingest_nla_jsonl(
+ self,
+ path: str | Path,
+ *,
+ regime: str,
+ strategy_type: str,
+ params: Dict[str, Any] | None = None,
+ source_model: str = "gemma4-nla",
+ tags: Iterable[str] = (),
+ ) -> List[NLARecord]:
+ """Import NLA JSONL evaluation output from nla-gemma4."""
+ records: List[NLARecord] = []
+ with Path(path).open("r", encoding="utf-8") as handle:
+ for line in handle:
+ if not line.strip():
+ continue
+ payload = json.loads(line)
+ cosine = float(payload.get("cosine", 0.0) or 0.0)
+ direction_mse = float(payload.get("direction_mse", 0.0) or 0.0)
+ record = NLARecord(
+ regime=regime,
+ strategy_type=strategy_type,
+ params=dict(params or {}),
+ narrative=str(payload.get("explanation", "")),
+ source_text=str(payload.get("text", "")),
+ source_model=str(payload.get("model_id", source_model)),
+ cosine=cosine,
+ direction_mse=direction_mse,
+ quality_score=cosine - direction_mse,
+ tags=sorted(set(tags)),
+ )
+ self.store(record)
+ records.append(record)
+ return records
+
+ def recall(
+ self,
+ *,
+ regime: str = "",
+ strategy_type: str = "",
+ n: int = 5,
+ ) -> List[NLARecord]:
+ """Recall top explicit NLA records for similar future agent runs."""
+ query = "SELECT * FROM nla_records WHERE 1=1"
+ params: List[Any] = []
+
+ if regime:
+ query += " AND regime = ?"
+ params.append(regime)
+ if strategy_type:
+ query += " AND strategy_type = ?"
+ params.append(strategy_type)
+
+ query += " ORDER BY quality_score DESC, timestamp DESC LIMIT ?"
+ params.append(n)
+
+ with sqlite3.connect(self.db_path) as conn:
+ conn.row_factory = sqlite3.Row
+ rows = conn.execute(query, params).fetchall()
+ return [_row_to_record(row) for row in rows]
+
+ def list_recent(self, n: int = 25) -> List[NLARecord]:
+ """Return recent NLA records regardless of regime."""
+ with sqlite3.connect(self.db_path) as conn:
+ conn.row_factory = sqlite3.Row
+ rows = conn.execute(
+ "SELECT * FROM nla_records ORDER BY timestamp DESC LIMIT ?",
+ (n,),
+ ).fetchall()
+ return [_row_to_record(row) for row in rows]
+
+ def to_prompt_context(self, regime: str, strategy_type: str = "", n: int = 5) -> str:
+ """Format explicit NLA memories as retrieval context for an agent."""
+ records = self.recall(regime=regime, strategy_type=strategy_type, n=n)
+ if not records:
+ return "No NLA memory records for this regime and strategy yet."
+
+ lines = [
+ "NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES:",
+ " Use these as research notes, not as hidden chain-of-thought.",
+ ]
+ for record in records:
+ lines.append(
+ f" - {record.strategy_type} {json.dumps(record.params, sort_keys=True)} "
+ f"| Quality={record.quality_score:.2f}, Source={record.source_model}, "
+ f"Narrative={record.narrative}"
+ )
+ return "\n".join(lines)
+
+
+def _agent_source_text(strategy_type: str, params: Dict[str, Any], metrics: Dict[str, Any]) -> str:
+ return (
+ f"{strategy_type} proposal {json.dumps(params, sort_keys=True)} "
+ f"produced metrics {json.dumps(metrics, sort_keys=True, default=str)}"
+ )
+
+
+def _row_to_record(row: sqlite3.Row) -> NLARecord:
+ return NLARecord(
+ record_id=row["record_id"],
+ timestamp=row["timestamp"],
+ regime=row["regime"],
+ strategy_type=row["strategy_type"],
+ params=json.loads(row["params_json"] or "{}"),
+ narrative=row["narrative"] or "",
+ source_text=row["source_text"] or "",
+ source_model=row["source_model"] or "",
+ cosine=float(row["cosine"] or 0.0),
+ direction_mse=float(row["direction_mse"] or 0.0),
+ quality_score=float(row["quality_score"] or 0.0),
+ tags=json.loads(row["tags_json"] or "[]"),
+ alpha_id=row["alpha_id"] or "",
+ )
diff --git a/src/research/workspace.py b/src/research/workspace.py
new file mode 100644
index 0000000..eda8196
--- /dev/null
+++ b/src/research/workspace.py
@@ -0,0 +1,432 @@
+"""
+Research Workspace
+==================
+
+Typed platform layer for turning experiment outputs into inspectable research
+runs. The dashboard can render these objects without knowing whether they came
+from SQLite, CSV backtests, or future agent-generated reports.
+"""
+
+from __future__ import annotations
+
+import json
+import math
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from statistics import mean, median
+from typing import Any, Dict, Iterable, List
+
+import pandas as pd
+
+PASS = "pass"
+WARN = "warn"
+FAIL = "fail"
+
+
+@dataclass(frozen=True)
+class ValidationCheck:
+ """A research hygiene check shown in the platform workspace."""
+
+ name: str
+ status: str
+ detail: str
+
+
+@dataclass(frozen=True)
+class ResearchRun:
+ """A normalized, UI-ready experiment or benchmark run."""
+
+ run_id: str
+ name: str
+ source: str
+ strategy: str
+ mode: str
+ metrics: Dict[str, float]
+ validation_checks: List[ValidationCheck] = field(default_factory=list)
+ artifacts: List[str] = field(default_factory=list)
+ notes: str = ""
+ timestamp: str = ""
+ git_hash: str = ""
+
+ @property
+ def sharpe(self) -> float:
+ return float(self.metrics.get("sharpe", 0.0))
+
+ @property
+ def total_return(self) -> float:
+ return float(self.metrics.get("total_return", 0.0))
+
+ @property
+ def max_drawdown(self) -> float:
+ return abs(float(self.metrics.get("max_drawdown", 0.0)))
+
+ @property
+ def robustness_score(self) -> float:
+ return float(self.metrics.get("robustness_score", self.sharpe - self.max_drawdown))
+
+ @property
+ def validation_status(self) -> str:
+ statuses = {check.status for check in self.validation_checks}
+ if FAIL in statuses:
+ return FAIL
+ if WARN in statuses:
+ return WARN
+ return PASS
+
+ def as_row(self) -> Dict[str, Any]:
+ return {
+ "Run ID": self.run_id,
+ "Name": self.name,
+ "Mode": self.mode,
+ "Strategy": self.strategy,
+ "Source": self.source,
+ "Sharpe": round(self.sharpe, 3),
+ "Return": self.total_return,
+ "Max Drawdown": self.max_drawdown,
+ "Robustness": round(self.robustness_score, 3),
+ "Validation": self.validation_status,
+ "Git": self.git_hash,
+ }
+
+
+def _coerce_float(value: Any, default: float = 0.0) -> float:
+ """Extract a numeric scalar from plain values or pandas string dumps."""
+ if value is None:
+ return default
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
+ if math.isnan(value):
+ return default
+ return float(value)
+
+ text = str(value).strip()
+ matches = re.findall(r"[-+]?(?:\d*\.\d+|\d+)(?:[eE][-+]?\d+)?", text)
+ if not matches:
+ return default
+ return float(matches[0])
+
+
+def _metric_alias(row: Dict[str, Any], *names: str) -> float:
+ for name in names:
+ if name in row and pd.notna(row[name]):
+ return _coerce_float(row[name])
+ return 0.0
+
+
+def _basic_checks(
+ *,
+ sharpe: float,
+ max_drawdown: float,
+ n_windows: int = 1,
+ source: str,
+) -> List[ValidationCheck]:
+ checks = [
+ ValidationCheck(
+ name="Metric completeness",
+ status=PASS if isinstance(sharpe, float) and isinstance(max_drawdown, float) else FAIL,
+ detail="Sharpe and drawdown are available for comparison.",
+ ),
+ ValidationCheck(
+ name="Drawdown sanity",
+ status=PASS if abs(max_drawdown) <= 0.35 else WARN,
+ detail=f"Observed max drawdown is {abs(max_drawdown) * 100:.1f}%.",
+ ),
+ ]
+
+ if source == "walk_forward":
+ checks.append(
+ ValidationCheck(
+ name="Temporal validation",
+ status=PASS if n_windows >= 3 else WARN,
+ detail=f"Evaluated across {n_windows} chronological windows.",
+ )
+ )
+ checks.append(
+ ValidationCheck(
+ name="Robustness floor",
+ status=PASS if sharpe > 0 and n_windows >= 3 else WARN,
+ detail="Mean Sharpe remains positive after chronological splitting.",
+ )
+ )
+ else:
+ checks.append(
+ ValidationCheck(
+ name="Baseline context",
+ status=WARN,
+ detail="Useful benchmark, but not a leakage-safe validation protocol.",
+ )
+ )
+
+ return checks
+
+
+def _aggregate_walk_forward(path: Path) -> ResearchRun | None:
+ df = pd.read_csv(path)
+ if df.empty:
+ return None
+
+ sharpes = [_coerce_float(v) for v in df["sharpe"].tolist()]
+ returns = [_coerce_float(v) for v in df["return"].tolist()]
+ drawdowns = [_coerce_float(v) for v in df["drawdown"].tolist()]
+ mean_sharpe = mean(sharpes)
+ sharpe_std = pd.Series(sharpes).std(ddof=0) if len(sharpes) > 1 else 0.0
+ max_drawdown = max(abs(v) for v in drawdowns) if drawdowns else 0.0
+ metrics = {
+ "sharpe": mean_sharpe,
+ "median_sharpe": median(sharpes),
+ "min_sharpe": min(sharpes),
+ "sharpe_std": float(sharpe_std),
+ "total_return": sum(returns),
+ "max_drawdown": max_drawdown,
+ "robustness_score": mean_sharpe - float(sharpe_std) - max_drawdown,
+ "n_windows": float(len(df)),
+ }
+ return ResearchRun(
+ run_id="wf-momentum",
+ name="Walk-forward momentum study",
+ source="walk_forward",
+ strategy="momentum",
+ mode="Agent research",
+ metrics=metrics,
+ validation_checks=_basic_checks(
+ sharpe=mean_sharpe,
+ max_drawdown=max_drawdown,
+ n_windows=len(df),
+ source="walk_forward",
+ ),
+ artifacts=[str(path)],
+ notes="Chronological windows with agent-selected momentum parameters.",
+ )
+
+
+def _runs_from_static_baselines(path: Path) -> List[ResearchRun]:
+ df = pd.read_csv(path)
+ runs = []
+ for idx, row in df.iterrows():
+ record = row.to_dict()
+ strategy = str(record.get("strategy", f"baseline-{idx}"))
+ sharpe = _metric_alias(record, "sharpe", "sharpe_ratio")
+ total_return = _metric_alias(record, "return", "total_return")
+ max_drawdown = abs(_metric_alias(record, "drawdown", "max_drawdown"))
+ runs.append(
+ ResearchRun(
+ run_id=f"base-{idx + 1}",
+ name=f"{strategy} benchmark",
+ source="baseline",
+ strategy=strategy,
+ mode="Benchmark",
+ metrics={
+ "sharpe": sharpe,
+ "total_return": total_return,
+ "max_drawdown": max_drawdown,
+ "robustness_score": sharpe - max_drawdown,
+ },
+ validation_checks=_basic_checks(
+ sharpe=sharpe,
+ max_drawdown=max_drawdown,
+ source="baseline",
+ ),
+ artifacts=[str(path)],
+ notes="Static benchmark used to anchor agent results.",
+ )
+ )
+ return runs
+
+
+def _aggregate_random_baseline(path: Path) -> ResearchRun | None:
+ df = pd.read_csv(path)
+ if df.empty:
+ return None
+
+ sharpes = [_coerce_float(v) for v in df["sharpe"].tolist()]
+ returns = [_coerce_float(v) for v in df["return"].tolist()]
+ drawdowns = [_coerce_float(v) for v in df["drawdown"].tolist()]
+ p95 = float(pd.Series(sharpes).quantile(0.95))
+ max_drawdown = max(abs(v) for v in drawdowns) if drawdowns else 0.0
+ metrics = {
+ "sharpe": mean(sharpes),
+ "p95_sharpe": p95,
+ "total_return": mean(returns),
+ "max_drawdown": max_drawdown,
+ "robustness_score": mean(sharpes) - max_drawdown,
+ "n_trials": float(len(df)),
+ }
+ return ResearchRun(
+ run_id="rnd-momentum",
+ name="Random momentum baseline",
+ source="baseline",
+ strategy="momentum",
+ mode="Benchmark",
+ metrics=metrics,
+ validation_checks=_basic_checks(
+ sharpe=metrics["sharpe"],
+ max_drawdown=max_drawdown,
+ source="baseline",
+ ),
+ artifacts=[str(path)],
+ notes="Distributional baseline for checking whether agent runs beat random parameter search.",
+ )
+
+
+def _runs_from_ablation(path: Path) -> List[ResearchRun]:
+ df = pd.read_csv(path)
+ if df.empty or "type" not in df or "sharpe" not in df:
+ return []
+
+ runs = []
+ for group_name, group in df.groupby("type"):
+ sharpes = [_coerce_float(v) for v in group["sharpe"].tolist()]
+ avg_sharpe = mean(sharpes)
+ runs.append(
+ ResearchRun(
+ run_id=f"abl-{str(group_name).lower().replace(' ', '-')}",
+ name=f"{group_name} ablation",
+ source="ablation",
+ strategy="agent_context",
+ mode="Ablation",
+ metrics={
+ "sharpe": avg_sharpe,
+ "total_return": 0.0,
+ "max_drawdown": 0.0,
+ "robustness_score": avg_sharpe,
+ "n_trials": float(len(group)),
+ },
+ validation_checks=[
+ ValidationCheck(
+ name="Ablation coverage",
+ status=PASS if len(group) >= 3 else WARN,
+ detail=f"{len(group)} trials available for this ablation arm.",
+ ),
+ ValidationCheck(
+ name="Metric completeness",
+ status=PASS,
+ detail="Sharpe is available for context-vs-no-context comparison.",
+ ),
+ ],
+ artifacts=[str(path)],
+ notes="Compares agent proposal quality with and without regime context.",
+ )
+ )
+ return runs
+
+
+def _runs_from_results_store(db_path: Path) -> List[ResearchRun]:
+ if not db_path.exists():
+ return []
+
+ from experiments.results_store import ResultsStore
+
+ runs = []
+ store = ResultsStore(str(db_path))
+ for row in store.list_runs():
+ aggregate = json.loads(row.get("aggregate_metrics") or "{}")
+ sharpe = _metric_alias(aggregate, "mean_sharpe", "sharpe", "sharpe_ratio")
+ max_drawdown = abs(_metric_alias(aggregate, "max_drawdown"))
+ run = ResearchRun(
+ run_id=str(row["run_id"]),
+ name=f"{row['experiment_type']} run",
+ source="sqlite",
+ strategy=str(aggregate.get("strategy", "mixed")),
+ mode=str(row["experiment_type"]),
+ timestamp=str(row.get("timestamp", "")),
+ git_hash=str(row.get("git_hash", "")),
+ metrics={
+ "sharpe": sharpe,
+ "total_return": _metric_alias(aggregate, "total_return"),
+ "max_drawdown": max_drawdown,
+ "robustness_score": sharpe - max_drawdown,
+ "n_windows": _metric_alias(aggregate, "n_windows"),
+ },
+ validation_checks=_basic_checks(
+ sharpe=sharpe,
+ max_drawdown=max_drawdown,
+ n_windows=int(_metric_alias(aggregate, "n_windows")),
+ source="walk_forward" if "walk" in str(row["experiment_type"]) else "sqlite",
+ ),
+ artifacts=[str(db_path)],
+ )
+ runs.append(run)
+ return runs
+
+
+def load_research_workspace(
+ experiments_dir: str | Path = "experiments",
+ results_db_path: str | Path = "experiments/results.db",
+) -> List[ResearchRun]:
+ """Load platform-ready research runs from known local experiment artifacts."""
+ exp_dir = Path(experiments_dir)
+ runs: List[ResearchRun] = []
+
+ runs.extend(_runs_from_results_store(Path(results_db_path)))
+
+ walk_forward_path = exp_dir / "walk_forward_results.csv"
+ if walk_forward_path.exists():
+ run = _aggregate_walk_forward(walk_forward_path)
+ if run:
+ runs.append(run)
+
+ static_path = exp_dir / "static_baseline_results.csv"
+ if static_path.exists():
+ runs.extend(_runs_from_static_baselines(static_path))
+
+ random_path = exp_dir / "random_baseline_results.csv"
+ if random_path.exists():
+ run = _aggregate_random_baseline(random_path)
+ if run:
+ runs.append(run)
+
+ ablation_path = exp_dir / "ablation_results.csv"
+ if ablation_path.exists():
+ runs.extend(_runs_from_ablation(ablation_path))
+
+ return sorted(runs, key=lambda run: run.robustness_score, reverse=True)
+
+
+def runs_to_dataframe(runs: Iterable[ResearchRun]) -> pd.DataFrame:
+ """Convert research runs to a dashboard-friendly table."""
+ rows = [run.as_row() for run in runs]
+ return pd.DataFrame(rows)
+
+
+def summarize_workspace(runs: Iterable[ResearchRun]) -> Dict[str, Any]:
+ """Aggregate high-level platform KPIs for the research workspace."""
+ run_list = list(runs)
+ if not run_list:
+ return {
+ "run_count": 0,
+ "best_run": None,
+ "best_sharpe": 0.0,
+ "best_robustness": 0.0,
+ "validation_pass_rate": 0.0,
+ }
+
+ best_run = max(run_list, key=lambda run: run.robustness_score)
+ pass_count = sum(1 for run in run_list if run.validation_status == PASS)
+ return {
+ "run_count": len(run_list),
+ "best_run": best_run,
+ "best_sharpe": max(run.sharpe for run in run_list),
+ "best_robustness": best_run.robustness_score,
+ "validation_pass_rate": pass_count / len(run_list),
+ }
+
+
+def build_research_memo(run: ResearchRun) -> str:
+ """Generate a concise Markdown memo for a selected research run."""
+ checks = "\n".join(
+ f"- **{check.name}** ({check.status}): {check.detail}"
+ for check in run.validation_checks
+ )
+ return f"""### {run.name}
+
+**Mode:** {run.mode}
+**Strategy:** {run.strategy}
+**Source:** `{run.source}`
+
+**Result:** Sharpe {run.sharpe:.3f}, total return {run.total_return * 100:.1f}%, max drawdown {run.max_drawdown * 100:.1f}%, robustness {run.robustness_score:.3f}.
+
+**Research Notes:** {run.notes or "No notes recorded."}
+
+**Validation Checks**
+{checks}
+"""
diff --git a/tests/test_alpha_store.py b/tests/test_alpha_store.py
new file mode 100644
index 0000000..5e19ba1
--- /dev/null
+++ b/tests/test_alpha_store.py
@@ -0,0 +1,113 @@
+"""Tests for alpha memory persistence and retrieval."""
+
+from src.agent.context_builder import RegimeContext
+from src.agent.proposal_generator import ProposalGenerator
+from src.research.alpha_store import AlphaStore
+
+
+class NoopPlanner:
+ def is_available(self):
+ return False
+
+ def generate_proposals(self, prompt, n=5):
+ return []
+
+
+def test_alpha_store_persists_and_recalls_top_candidates(tmp_path):
+ store = AlphaStore(tmp_path / "alphas.db")
+
+ weak = store.store_backtest_result(
+ regime="MidVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 5, "slow_window": 20},
+ metrics={"sharpe_ratio": 0.1, "total_return": 0.02, "max_drawdown": 0.04},
+ assets=["SPY"],
+ reasoning="Short momentum test.",
+ )
+ strong = store.store_backtest_result(
+ regime="MidVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 50, "slow_window": 200},
+ metrics={"sharpe_ratio": 1.2, "total_return": 0.22, "max_drawdown": 0.05},
+ assets=["SPY", "QQQ"],
+ reasoning="Long-horizon momentum held up in bullish mid-vol regime.",
+ )
+
+ recalled = store.recall(regime="MidVol-Bull", strategy_type="momentum", n=5)
+
+ assert [alpha.alpha_id for alpha in recalled] == [strong.alpha_id, weak.alpha_id]
+ assert recalled[0].status == "accepted"
+ assert recalled[0].assets == ["QQQ", "SPY"]
+
+
+def test_alpha_prompt_context_is_agent_readable(tmp_path):
+ store = AlphaStore(tmp_path / "alphas.db")
+ store.store_backtest_result(
+ regime="LowVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 63, "slow_window": 252},
+ metrics={"sharpe_ratio": 0.8, "total_return": 0.18, "max_drawdown": 0.03},
+ assets=["SPY"],
+ reasoning="Slow momentum worked in calm uptrends.",
+ )
+
+ context = store.to_prompt_context("LowVol-Bull", "momentum")
+
+ assert "ALPHA MEMORY FROM PRIOR RUNS" in context
+ assert "Slow momentum worked" in context
+ assert "fast_window" in context
+
+
+def test_alpha_prompt_context_includes_rejected_configs(tmp_path):
+ store = AlphaStore(tmp_path / "alphas.db")
+ store.store_backtest_result(
+ regime="MidVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 5, "slow_window": 20},
+ metrics={"sharpe_ratio": -0.4, "total_return": -0.1, "max_drawdown": 0.25},
+ assets=["SPY"],
+ reasoning="Too reactive in this regime.",
+ )
+
+ context = store.to_prompt_context("MidVol-Bull", "momentum")
+
+ assert "REJECTED" in context
+ assert "Avoid repeating" in context
+
+
+def test_proposal_generator_uses_alpha_memory_before_grid(tmp_path):
+ store = AlphaStore(tmp_path / "alphas.db")
+ store.store_backtest_result(
+ regime="MidVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 50, "slow_window": 200},
+ metrics={"sharpe_ratio": 1.1, "total_return": 0.2, "max_drawdown": 0.08},
+ assets=["SPY"],
+ reasoning="Retrieved candidate should lead future generation.",
+ )
+
+ generator = ProposalGenerator(planner=NoopPlanner(), alpha_store=store)
+ context = RegimeContext(regime_label="MidVol-Bull")
+ proposals = generator.generate(context, n_proposals=3, strategy_type="momentum")
+
+ assert proposals[0].generation_method == "alpha_memory"
+ assert proposals[0].params == {"fast_window": 50, "slow_window": 200}
+ assert len(proposals) == 3
+
+
+def test_proposal_generator_avoids_rejected_alpha_params(tmp_path):
+ store = AlphaStore(tmp_path / "alphas.db")
+ store.store_backtest_result(
+ regime="MidVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 5, "slow_window": 20},
+ metrics={"sharpe_ratio": -0.5, "total_return": -0.08, "max_drawdown": 0.22},
+ assets=["SPY"],
+ reasoning="Rejected fast crossover.",
+ )
+
+ generator = ProposalGenerator(planner=NoopPlanner(), alpha_store=store)
+ context = RegimeContext(regime_label="MidVol-Bull")
+ proposals = generator.generate(context, n_proposals=5, strategy_type="momentum")
+
+ assert {"fast_window": 5, "slow_window": 20} not in [proposal.params for proposal in proposals]
diff --git a/tests/test_data_ingest.py b/tests/test_data_ingest.py
new file mode 100644
index 0000000..a363177
--- /dev/null
+++ b/tests/test_data_ingest.py
@@ -0,0 +1,58 @@
+"""Tests for live market-data ingestion and cache behavior."""
+
+import pandas as pd
+
+from src.data.ingest import _cache_covers_range, fetch_ohlcv_data
+from src.utils.config import config
+
+
+def test_cache_range_coverage_detects_missing_requested_dates():
+ cached = pd.DataFrame(
+ {"Close": [100.0, 101.0]},
+ index=pd.to_datetime(["2024-01-02", "2024-01-03"]),
+ )
+
+ assert _cache_covers_range(cached, "2024-01-02", "2024-01-04")
+ assert not _cache_covers_range(cached, "2023-12-01", "2024-01-04")
+ assert not _cache_covers_range(cached, "2024-01-02", "2024-02-01")
+
+
+def test_fetch_refetches_when_cache_does_not_cover_range(tmp_path, monkeypatch):
+ monkeypatch.setattr(config, "data_path", str(tmp_path))
+ monkeypatch.setattr(config.cache, "enabled", True)
+ monkeypatch.setattr(config.cache, "ttl_hours", 24)
+
+ cached = pd.DataFrame(
+ {
+ "Open": [100.0],
+ "High": [101.0],
+ "Low": [99.0],
+ "Close": [100.5],
+ "Volume": [1_000],
+ },
+ index=pd.to_datetime(["2024-01-02"]),
+ )
+ cached.to_parquet(tmp_path / "AAPL.parquet")
+
+ downloaded = pd.DataFrame(
+ {
+ "Open": [100.0, 102.0],
+ "High": [101.0, 103.0],
+ "Low": [99.0, 101.0],
+ "Close": [100.5, 102.5],
+ "Volume": [1_000, 1_500],
+ },
+ index=pd.to_datetime(["2024-02-01", "2024-02-02"]),
+ )
+ calls = []
+
+ def fake_download(ticker, start=None, end=None, auto_adjust=True, progress=False):
+ calls.append((ticker, start, end, auto_adjust, progress))
+ return downloaded
+
+ monkeypatch.setattr("src.data.ingest.yf.download", fake_download)
+
+ result = fetch_ohlcv_data("AAPL", "2024-02-01", "2024-02-03")
+
+ assert calls == [("AAPL", "2024-02-01", "2024-02-03", True, False)]
+ assert len(result["AAPL"]) == 2
diff --git a/tests/test_nla_memory.py b/tests/test_nla_memory.py
new file mode 100644
index 0000000..b6d2e6b
--- /dev/null
+++ b/tests/test_nla_memory.py
@@ -0,0 +1,66 @@
+"""Tests for NLA-style research memory."""
+
+import json
+
+from src.agent.context_builder import RegimeContext
+from src.research.nla_memory import NLAMemoryStore
+
+
+def test_nla_memory_stores_agent_summary_and_context(tmp_path):
+ store = NLAMemoryStore(tmp_path / "research.db")
+ record = store.store_agent_summary(
+ regime="MidVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 50, "slow_window": 200},
+ narrative="Slow crossover narrative from explicit proposal reasoning.",
+ metrics={"sharpe_ratio": 1.1, "max_drawdown": 0.08},
+ alpha_id="alpha123",
+ tags=("test",),
+ )
+
+ recalled = store.recall(regime="MidVol-Bull", strategy_type="momentum")
+ context = store.to_prompt_context("MidVol-Bull", "momentum")
+
+ assert recalled[0].record_id == record.record_id
+ assert recalled[0].alpha_id == "alpha123"
+ assert "NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES" in context
+ assert "Slow crossover narrative" in context
+ assert "not as hidden chain-of-thought" in context
+
+
+def test_nla_memory_ingests_gemma4_jsonl(tmp_path):
+ jsonl_path = tmp_path / "nla_eval.jsonl"
+ payload = {
+ "text": "momentum proposal with fast_window=20",
+ "explanation": "Activation narrative favors slower confirmation.",
+ "direction_mse": 0.15,
+ "cosine": 0.82,
+ }
+ jsonl_path.write_text(json.dumps(payload) + "\n", encoding="utf-8")
+
+ store = NLAMemoryStore(tmp_path / "research.db")
+ records = store.ingest_nla_jsonl(
+ jsonl_path,
+ regime="LowVol-Bull",
+ strategy_type="momentum",
+ params={"fast_window": 20},
+ tags=("gemma4",),
+ )
+
+ assert len(records) == 1
+ assert records[0].source_model == "gemma4-nla"
+ assert records[0].quality_score == 0.82 - 0.15
+ assert store.recall(regime="LowVol-Bull", strategy_type="momentum")[0].narrative.startswith(
+ "Activation narrative"
+ )
+
+
+def test_regime_context_includes_nla_memory():
+ context = RegimeContext(
+ regime_label="MidVol-Bull",
+ nla_memory_context="NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES:\n - note",
+ )
+
+ prompt = context.to_prompt_string()
+
+ assert "NLA MEMORY FROM EXPLICIT ACTIVATION NARRATIVES" in prompt
diff --git a/tests/test_research_workspace.py b/tests/test_research_workspace.py
new file mode 100644
index 0000000..2355179
--- /dev/null
+++ b/tests/test_research_workspace.py
@@ -0,0 +1,81 @@
+"""Tests for the platform research workspace layer."""
+
+import pandas as pd
+
+from src.research.workspace import (
+ build_research_memo,
+ load_research_workspace,
+ runs_to_dataframe,
+ summarize_workspace,
+)
+
+
+def test_load_research_workspace_aggregates_csv_artifacts(tmp_path):
+ exp_dir = tmp_path / "experiments"
+ exp_dir.mkdir()
+
+ pd.DataFrame({
+ "test_start": ["2024-01-01", "2024-07-01", "2025-01-01"],
+ "test_end": ["2024-06-30", "2024-12-31", "2025-06-30"],
+ "sharpe": [1.0, 0.5, -0.25],
+ "return": [0.10, 0.04, -0.02],
+ "drawdown": [0.04, 0.07, 0.10],
+ "params": ["{}", "{}", "{}"],
+ }).to_csv(exp_dir / "walk_forward_results.csv", index=False)
+
+ pd.DataFrame({
+ "strategy": ["Buy and Hold", "Golden Cross"],
+ "sharpe": ["Ticker\nSPY 0.80\ndtype: float64", "0.55"],
+ "return": ["Ticker\nSPY 0.25\ndtype: float64", "0.08"],
+ "drawdown": ["Ticker\nSPY -0.20\ndtype: float64", "0.05"],
+ }).to_csv(exp_dir / "static_baseline_results.csv", index=False)
+
+ runs = load_research_workspace(exp_dir, tmp_path / "missing.db")
+
+ assert len(runs) == 3
+ assert any(run.run_id == "wf-momentum" for run in runs)
+ assert any(run.name == "Buy and Hold benchmark" for run in runs)
+
+ walk_forward = next(run for run in runs if run.run_id == "wf-momentum")
+ buy_hold = next(run for run in runs if run.name == "Buy and Hold benchmark")
+ assert walk_forward.metrics["n_windows"] == 3
+ assert walk_forward.validation_status == "pass"
+ assert buy_hold.sharpe == 0.8
+ assert buy_hold.total_return == 0.25
+
+
+def test_workspace_summary_selects_best_robustness(tmp_path):
+ exp_dir = tmp_path / "experiments"
+ exp_dir.mkdir()
+
+ pd.DataFrame({
+ "strategy": ["Low Drawdown", "High Sharpe"],
+ "sharpe": [0.7, 0.9],
+ "return": [0.08, 0.12],
+ "drawdown": [0.03, 0.35],
+ }).to_csv(exp_dir / "static_baseline_results.csv", index=False)
+
+ runs = load_research_workspace(exp_dir, tmp_path / "missing.db")
+ summary = summarize_workspace(runs)
+
+ assert summary["run_count"] == 2
+ assert summary["best_sharpe"] == 0.9
+ assert summary["best_run"].name == "Low Drawdown benchmark"
+
+
+def test_dataframe_and_memo_are_dashboard_ready(tmp_path):
+ exp_dir = tmp_path / "experiments"
+ exp_dir.mkdir()
+
+ pd.DataFrame({
+ "type": ["With Context", "With Context", "No Context"],
+ "sharpe": [0.4, 0.6, 0.2],
+ }).to_csv(exp_dir / "ablation_results.csv", index=False)
+
+ runs = load_research_workspace(exp_dir, tmp_path / "missing.db")
+ df = runs_to_dataframe(runs)
+ memo = build_research_memo(runs[0])
+
+ assert {"Run ID", "Name", "Sharpe", "Robustness", "Validation"}.issubset(df.columns)
+ assert "Validation Checks" in memo
+ assert runs[0].name in memo