From 24e77d79da474c2473794bdb5b4e668357a79558 Mon Sep 17 00:00:00 2001
From: Roberto <robertoecf@users.noreply.github.com>
Date: Mon, 29 Jun 2026 20:51:06 -0300
Subject: [PATCH 1/7] feat(resolver): asset classifier mapping identifiers to
 Wealthuman taxonomy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add resolve_asset(): turns any Brazilian asset identifier (ticker/CNPJ/ISIN/
name) into a classification mapped to the consolidation macro taxonomy
(RF/RV/Multimercado/Alternativos/Estruturados) plus an orthogonal exposure
axis (Brasil/Internacional), subclasse, underlying_nature, debenture
Lei-12.431 facts, source, confidence, and an audit cascade.

Deterministic, cacheable, no PII. Core is offline (curated ETF/global-fund
seed + structural rules); external providers (Mais Retorno / CVM-B3 / web
search) are an injectable fallback chain. Exposed over REST (/resolver/resolve)
and as the resolve_asset MCP tool.

Passes the Wealthuman spec test set: IFRA11→RF/Indexada à Inflação,
ARBOR/WHG→RV+Internacional, DEB PETROBRAS IPCA+→RF incentivada+isento,
COE→Estruturados (never ETF), "Crédito Estruturado"→RF (name-trap),
IVVB11→RV+Internacional, FIIs→RV.

Geography is modeled as the exposure axis, not a macro class (macro_class is
pure asset class). Hardened after cross-host adversarial review: heuristic
Lei-12.431 isento kept below the cascade short-circuit so a provider confirms
by ISIN; bare-token collisions (IE/LC/LF, substring LCI) removed; API length
caps; as_of stamped in America/Sao_Paulo.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                        |  16 +
 docs/MCP_SURFACE.md                 |   9 +-
 pyproject.toml                      |   3 +
 src/findata/api/app.py              |   2 +
 src/findata/api/mcp_app.py          |  29 ++
 src/findata/api/routers/resolver.py |  35 ++
 src/findata/resolver/__init__.py    |  35 ++
 src/findata/resolver/engine.py      | 565 ++++++++++++++++++++++++++++
 src/findata/resolver/models.py      | 124 ++++++
 src/findata/resolver/normalize.py   | 125 ++++++
 src/findata/resolver/seed.py        | 169 +++++++++
 src/findata/resolver/taxonomy.py    |  85 +++++
 tests/test_mcp_surface.py           |   4 +-
 tests/test_resolver.py              | 232 ++++++++++++
 14 files changed, 1427 insertions(+), 6 deletions(-)
 create mode 100644 src/findata/api/routers/resolver.py
 create mode 100644 src/findata/resolver/__init__.py
 create mode 100644 src/findata/resolver/engine.py
 create mode 100644 src/findata/resolver/models.py
 create mode 100644 src/findata/resolver/normalize.py
 create mode 100644 src/findata/resolver/seed.py
 create mode 100644 src/findata/resolver/taxonomy.py
 create mode 100644 tests/test_resolver.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f12ff64..a2f2a67 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,22 @@ adheres to [Semantic Versioning](https://semver.org/).
 
 ### Added
 
+- **Asset-classification resolver** — `findata.resolver.resolve_asset()`,
+  `GET /resolver/resolve`, and the `resolve_asset` MCP tool. Turns any
+  Brazilian asset identifier (ticker/CNPJ/ISIN/name) into a classification
+  mapped to the consolidation macro taxonomy (Renda Fixa, Renda Variável,
+  Multimercado, Alternativos, Estruturados) plus an orthogonal `exposure`
+  axis (Brasil/Internacional), `subclasse`, `underlying_nature`, debenture
+  Lei-12.431 facts, `source`, `confidence`, and the `cascade` walked.
+  Deterministic and offline at its core (a curated ETF/global-fund seed +
+  structural rules), with an injectable external-provider chain (Mais
+  Retorno / CVM-B3 / restricted web search) for low-confidence fallback.
+  Classifies ETFs/funds by underlying (IFRA11 debêntures → RF; IVVB11 ações
+  → RV + Internacional), defends the COE-never-ETF and "Crédito Estruturado"
+  name-traps, and keeps the Lei-12.431 isento flag below the cascade
+  short-circuit when only inferred by heuristic. Hardened after adversarial
+  review: bare-token collisions (`IE`/`LC`/`LF`/substring `LCI`) removed,
+  API length caps, `as_of` stamped in America/Sao_Paulo.
 - **ANBIMA Títulos Públicos (TPF) secondary market** — `get_tpf()`,
   `GET /anbima/tpf`, and `findata anbima tpf`. Daily reference rates for
   outstanding federal government bonds (LTN, LFT, NTN-B, NTN-C, NTN-F) from
diff --git a/docs/MCP_SURFACE.md b/docs/MCP_SURFACE.md
index db9d067..fbbb90a 100644
--- a/docs/MCP_SURFACE.md
+++ b/docs/MCP_SURFACE.md
@@ -49,14 +49,15 @@ safe. **The 95 REST routes that back the CLI and HTTP consumers never change.**
 
 | | 1:1 (old) | curated (new) |
 |---|---:|---:|
-| MCP tools | 95 | **24** (25 with code mode) |
-| `tools/list` size | ~85k chars (~21k tok) | **~29k chars (~7k tok)** |
-| REST operations | 95 | **95 (unchanged)** |
+| MCP tools | 95 | **25** (26 with code mode) |
+| `tools/list` size | ~85k chars (~21k tok) | **~30k chars (~7k tok)** |
+| REST operations | 95 | **96** |
 
-## The 24 curated tools
+## The 25 curated tools
 
 ```
 registry_lookup          ← start here: CNPJ / ticker / code / name → entities
+resolve_asset            ← classify an asset into the macro taxonomy + exposure
 
 bcb_series   bcb_ptax   bcb_focus                       (BCB: 12 → 3)
 cvm_company  cvm_financials  cvm_fund  cvm_structured_fund   (CVM: 22 → 4)
diff --git a/pyproject.toml b/pyproject.toml
index edbe9aa..870d722 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,6 +137,9 @@ max-statements = 50
 # Curated MCP layer: FastAPI Query() defaults (B008), wide consolidated tools
 # (PLR0913), and intentional flat dataset-dispatch switches (C901/PLR0912/PLR0911).
 "src/findata/api/mcp_app.py" = ["B008", "PLR0913", "C901", "PLR0912", "PLR0911"]
+# Resolver engine: the classification cascade is an intentional flat
+# rule-by-rule switch (one branch per instrument shape) — auditable by design.
+"src/findata/resolver/engine.py" = ["C901", "PLR0912", "PLR0911"]
 # CLI commands are naturally wide (many typer.Option flags).
 "src/findata/cli.py" = ["PLR0913"]
 # Banner uses rich + sys.stdout directly — not a print-statement debug.
diff --git a/src/findata/api/app.py b/src/findata/api/app.py
index 741683b..700cbdf 100644
--- a/src/findata/api/app.py
+++ b/src/findata/api/app.py
@@ -26,6 +26,7 @@
     openfinance,
     receita,
     registry,
+    resolver,
     susep,
     tesouro,
     yahoo,
@@ -139,6 +140,7 @@ async def _value_error_handler(_: Request, exc: ValueError) -> JSONResponse:
 app.include_router(aneel.router)
 app.include_router(susep.router)
 app.include_router(registry.router)
+app.include_router(resolver.router)
 app.include_router(yahoo.router)
 
 
diff --git a/src/findata/api/mcp_app.py b/src/findata/api/mcp_app.py
index a026e5f..b82115e 100644
--- a/src/findata/api/mcp_app.py
+++ b/src/findata/api/mcp_app.py
@@ -39,6 +39,7 @@
 
 from findata.api._b3_common import MAX_TICKERS, resolve_quotes
 from findata.registry import lookup
+from findata.resolver import resolve_asset
 from findata.sources.anbima import indices as anbima_src
 from findata.sources.aneel import leiloes
 from findata.sources.b3 import cotahist, indices
@@ -97,6 +98,34 @@ async def registry_lookup(
     return await lookup(q, limit=limit)
 
 
+@router.get(
+    "/resolver/resolve",
+    operation_id="resolve_asset",
+    response_model=None,
+    summary="Classify a Brazilian asset into the macro taxonomy (RF/RV/Multi/Intl/Alt/Estrut)",
+)
+async def resolve_asset_tool(
+    name: str | None = Query(
+        None, max_length=256, description="Asset name/label, e.g. 'FI ITAUINFRA CI'"
+    ),
+    ticker: str | None = Query(None, max_length=16, description="B3 ticker, e.g. IFRA11, PETR4"),
+    cnpj: str | None = Query(None, max_length=32, description="Fund CNPJ (masked or not)"),
+    isin: str | None = Query(None, max_length=16, description="ISIN, e.g. BR..."),
+) -> Any:
+    """Turn any asset identifier into a classification already mapped to the
+    consolidation macro taxonomy: Renda Fixa, Renda Variável, Multimercado,
+    Internacional, Alternativos, Estruturados.
+
+    Returns ``macro_class`` + ``subclasse`` + ``underlying_nature`` (splits
+    ETF-de-ações from ETF-de-debêntures), debenture/Lei-12.431 facts, ``source``,
+    ``confidence``, and the ``cascade`` walked — deterministic and cacheable.
+    Pass any subset of identifiers; a bare ticker/CNPJ given as ``name`` is
+    auto-detected. Use this (not ``registry_lookup``) when you need the asset's
+    macro class, not its registry entity.
+    """
+    return await resolve_asset(name=name, ticker=ticker, cnpj=cnpj, isin=isin)
+
+
 # ── BCB: Banco Central ────────────────────────────────────────────
 
 
diff --git a/src/findata/api/routers/resolver.py b/src/findata/api/routers/resolver.py
new file mode 100644
index 0000000..a47c8c3
--- /dev/null
+++ b/src/findata/api/routers/resolver.py
@@ -0,0 +1,35 @@
+"""Asset-classification resolver routes.
+
+Wraps :func:`findata.resolver.resolve_asset` over HTTP. The consolidator calls
+this per asset (dozens per statement), so the handler is a thin, cacheable pass
+through the deterministic core. No PII: only an asset identifier crosses the
+boundary.
+"""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, Query
+
+from findata.resolver import AssetClassification, resolve_asset
+
+router = APIRouter(prefix="/resolver", tags=["Resolver"])
+
+
+@router.get("/resolve")
+async def resolve(
+    name: str | None = Query(
+        None, max_length=256, description="Nome/label do ativo (ex.: 'FI ITAUINFRA CI')"
+    ),
+    ticker: str | None = Query(None, max_length=16, description="Ticker B3 (ex.: IFRA11, PETR4)"),
+    cnpj: str | None = Query(None, max_length=32, description="CNPJ do fundo (com ou sem máscara)"),
+    isin: str | None = Query(None, max_length=16, description="ISIN (ex.: BR...)"),
+) -> AssetClassification:
+    """Classifica um ativo na taxonomia macro Wealthuman.
+
+    Aceita qualquer identificador (``name``/``ticker``/``cnpj``/``isin``) e
+    devolve ``macro_class`` já mapeada (Renda Fixa, Renda Variável, Multimercado,
+    Internacional, Alternativos, Estruturados) + subclasse, underlying,
+    debênture/Lei 12.431, ``source``, ``confidence`` e a cascata percorrida.
+    Determinístico e cacheável.
+    """
+    return await resolve_asset(name=name, ticker=ticker, cnpj=cnpj, isin=isin)
diff --git a/src/findata/resolver/__init__.py b/src/findata/resolver/__init__.py
new file mode 100644
index 0000000..003c19b
--- /dev/null
+++ b/src/findata/resolver/__init__.py
@@ -0,0 +1,35 @@
+"""Wealthuman asset-classification resolver.
+
+``resolve_asset(identifier)`` turns any Brazilian asset identifier (ticker,
+CNPJ, ISIN, or bare name) into a classification already mapped to the Wealthuman
+macro taxonomy (Renda Fixa, Renda Variável, Multimercado, Internacional,
+Alternativos, Estruturados) plus subclasse, underlying nature, debenture /
+Lei-12.431 facts, source, confidence, and an audit cascade.
+
+Deterministic, cacheable, auditable, no PII. See ``openfindata-mcp-spec.md``.
+"""
+
+from __future__ import annotations
+
+from findata.resolver.engine import AssetProvider, classify, resolve_asset
+from findata.resolver.models import (
+    AssetClassification,
+    CvmInfo,
+    DebentureInfo,
+    IdentifierResolved,
+    TaxInfo,
+)
+from findata.resolver.normalize import NormalizedInput, normalize
+
+__all__ = [
+    "AssetClassification",
+    "AssetProvider",
+    "CvmInfo",
+    "DebentureInfo",
+    "IdentifierResolved",
+    "NormalizedInput",
+    "TaxInfo",
+    "classify",
+    "normalize",
+    "resolve_asset",
+]
diff --git a/src/findata/resolver/engine.py b/src/findata/resolver/engine.py
new file mode 100644
index 0000000..eabb757
--- /dev/null
+++ b/src/findata/resolver/engine.py
@@ -0,0 +1,565 @@
+"""The resolver engine: deterministic rule cascade + ``resolve_asset``.
+
+Classification is decided in this order, most-specific signal first:
+
+1. **Curated seed** (:mod:`findata.resolver.seed`) — only the non-derivable
+   cases (ETF underlying, global-mandate FIA).
+2. **Structural rules** (this module) — name/ticker patterns that *are*
+   derivable: COE, debenture, CRA/CRI, bank paper, Tesouro, IE/global,
+   FII, FIA/Ações, Multimercado, FIDC/FIP, plain tickers.
+3. **External providers** (optional, injected) — Mais Retorno MCP, CVM/B3,
+   restricted web search. Not bundled here (they are client-side / networked);
+   the resolver takes a chain of async callbacks so a deployment can wire them.
+   Each step that fires lowers ``confidence`` and is appended to ``cascade``.
+
+The seed + rules layers are pure and offline, so the spec's test set resolves
+deterministically with no network. ``source`` is ``"openfindata"`` for every
+core hit; an external provider that overrides a field updates ``source`` too.
+
+Key traps the ordering encodes (spec §Armadilhas):
+  * ``"Crédito Estruturado"`` is RF (credit), **not** Estruturados — checked
+    before any COE/Estruturados rule.
+  * **COE** is always Estruturados and **never** an ETF.
+  * an ETF/fund is classified by its **underlying** (IFRA11 debêntures → RF;
+    IVVB11 ações → RV).
+  * geography is the orthogonal ``exposure`` axis, never a macro class: a
+    global-mandate FIA is RV + exposure=Internacional; IVVB11 is RV +
+    Internacional; a BDR is RV + Internacional.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Protocol, cast
+from zoneinfo import ZoneInfo
+
+from findata.resolver.models import (
+    AssetClassification,
+    CvmInfo,
+    DebentureInfo,
+    Exposure,
+    IdentifierResolved,
+    TaxInfo,
+)
+from findata.resolver.normalize import NormalizedInput, normalize
+from findata.resolver.seed import lookup_seed
+
+# Issuers with a well-known programme of Lei-12.431 infrastructure debentures.
+# Used only as a *heuristic* signal (debenture + IPCA-linked + infra issuer →
+# likely incentivada); the live ANBIMA/debentures.com.br step confirms by ISIN.
+# Explicit list = auditable; not a claim that every issue from these is 12.431.
+_INFRA_DEBENTURE_ISSUERS = frozenset(
+    {
+        "PETROBRAS",
+        "RUMO",
+        "ENGIE",
+        "TAESA",
+        "ISA",
+        "CTEEP",
+        "ECORODOVIAS",
+        "CPFL",
+        "ENEVA",
+        "AEGEA",
+        "EQUATORIAL",
+        "NEOENERGIA",
+        "SABESP",
+        "COPEL",
+        "ELETROBRAS",
+        "ENERGISA",
+        "OMEGA",
+        "AUREN",
+        "COMGAS",
+        "VIBRA",
+        "MOTIVA",
+        "SANEPAR",
+        "CEMIG",
+    }
+)
+
+_GLOBAL_KEYWORDS = (
+    "GLOBAL",
+    "GLOBAIS",
+    "WORLD",
+    "WORLDWIDE",
+    "INTERNACIONAL",
+    "INTERNATIONAL",
+    "EXTERIOR",
+)
+
+_FUND_CONTEXT_TOKENS = ("FIA", "FIC", "FIM", "FUNDO", "FDO", "FUND", "MASTER", "FI")
+
+# ``as_of`` is stamped in Brazil time: the consolidation is a BR-market artifact,
+# so a server in another timezone must not shift the audit date across midnight.
+_BR_TZ = ZoneInfo("America/Sao_Paulo")
+
+# Above this confidence (and with a decided macro) the cascade short-circuits:
+# no point spending an external round-trip to confirm a strong core hit.
+_CONFIDENT_ENOUGH = 0.9
+
+
+class AssetProvider(Protocol):
+    """An external cascade step (Mais Retorno, CVM/B3, web search).
+
+    Receives the normalized input and the best classification so far; returns an
+    enriched classification (new ``source``, possibly higher-detail fields) or
+    ``None`` to pass. Implementations live outside the library because they are
+    networked / client-side; the resolver only orchestrates them.
+    """
+
+    async def __call__(
+        self, norm: NormalizedInput, current: AssetClassification
+    ) -> AssetClassification | None: ...
+
+
+# ── Small parsers ──────────────────────────────────────────────────
+
+
+def parse_indexador(name_folded: str) -> str | None:
+    """Recover the index from a folded RF instrument name, or ``None``."""
+    if "IPCA" in name_folded:
+        return "IPCA+"
+    if "CDI+" in name_folded or "CDI +" in name_folded:
+        return "CDI+"
+    if "%CDI" in name_folded or "% CDI" in name_folded or "DO CDI" in name_folded:
+        return "%CDI"
+    if "SELIC" in name_folded:
+        return "SELIC"
+    if "PREFIX" in name_folded or "PRE FIXAD" in name_folded:
+        return "PREFIXADO"
+    if "CDI" in name_folded:
+        return "%CDI"
+    return None
+
+
+def _subclasse_from_indexador(indexador: str | None) -> str:
+    if indexador == "IPCA+":
+        return "Indexada à Inflação"
+    if indexador in {"%CDI", "CDI+", "SELIC"}:
+        return "Pós-fixada"
+    if indexador == "PREFIXADO":
+        return "Prefixada"
+    return "Crédito Privado"
+
+
+def _infer_incentivada(
+    norm: NormalizedInput, indexador: str | None
+) -> tuple[bool | None, str, str | None]:
+    """Decide Lei-12.431 incentivada for a debenture. Returns (flag, note, basis).
+
+    ``basis`` is ``"explicit"`` for an in-name signal (high certainty),
+    ``"heuristic"`` for the IPCA+infra-issuer inference (must be confirmed by
+    ISIN, so the caller keeps confidence low and lets the cascade verify), or
+    ``None`` when there is no signal at all (we return ``None`` for the flag —
+    unknown, never assert False).
+    """
+    if norm.name_contains("INCENTIVAD", "12.431", "12431", "INFRAESTRUTURA", "FI-INFRA") or (
+        norm.has_token("INFRA") and norm.has_token("DEB", "DEBENTURE", "DEBENTURES")
+    ):
+        return (
+            True,
+            "Incentivada Lei 12.431 (sinal explícito de infraestrutura no nome).",
+            "explicit",
+        )
+    issuer_hit = any(t in _INFRA_DEBENTURE_ISSUERS for t in norm.tokens)
+    if issuer_hit and indexador == "IPCA+":
+        return (
+            True,
+            (
+                "Incentivada Lei 12.431 SÓ por heurística (debênture IPCA+ de emissor com "
+                "programa de infra); confidence baixa de propósito — confirmar por ISIN em "
+                "ANBIMA/debentures.com.br antes de tratar como isento."
+            ),
+            "heuristic",
+        )
+    return None, "", None
+
+
+# ── The rule cascade ───────────────────────────────────────────────
+
+
+def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
+    """Run the structural rules; return a partial payload dict (always non-empty).
+
+    The first matching rule wins. Every branch sets at least ``kind`` and
+    ``macro_class``; the assembler fills defaults for the rest.
+    """
+    n = norm
+
+    # 0) Name-trap: "Crédito Estruturado" is structured *credit* → RF, NOT
+    #    Estruturados. Must precede the COE rule.
+    if n.name_contains("CREDITO ESTRUTURADO"):
+        return {
+            "kind": "fundo",
+            "macro_class": "Renda Fixa",
+            "subclasse": "Crédito Privado",
+            "exposure": "Brasil",
+            "underlying_nature": "credito",
+            "confidence": 0.9,
+            "notes": "Name-trap: 'Crédito Estruturado' é crédito (RF), não COE/Estruturados.",
+        }
+
+    # 1) COE / operações estruturadas → Estruturados, never an ETF.
+    if n.has_token("COE") or n.name_contains(
+        "OPERACOES ESTRUTURADAS",
+        "OPERACAO ESTRUTURADA",
+        "CERTIFICADO DE OPERACOES",
+        "CERT DE OPERACOES",
+        "NOTA ESTRUTURADA",
+        "NOTAS ESTRUTURADAS",
+    ):
+        return {
+            "kind": "coe",
+            "macro_class": "Estruturados",
+            "subclasse": "COE",
+            "underlying_nature": "outro",
+            "estrutura": "COE",
+            "confidence": 0.95,
+            "notes": "COE (Certificado de Operações Estruturadas, CETIP) → Estruturados.",
+        }
+
+    # 2) Debenture → RF; parse indexador + incentivada.
+    if n.has_token("DEB", "DEBENTURE", "DEBENTURES", "DEBENT"):
+        indexador = parse_indexador(n.name_folded)
+        incentivada, note, basis = _infer_incentivada(n, indexador)
+        deb: dict[str, Any] = {"indexador": indexador}
+        tax: dict[str, Any] = {}
+        if incentivada:
+            deb["incentivada_1243"] = True
+            tax["isento"] = True
+        # An *explicit* infra signal is high-confidence. The issuer+IPCA
+        # heuristic is deliberately kept below the cascade short-circuit
+        # threshold (_CONFIDENT_ENOUGH) so a wired provider re-checks the
+        # isento claim by ISIN instead of it being taken as fact.
+        if basis == "explicit":
+            confidence = 0.92
+        elif basis == "heuristic":
+            confidence = 0.7
+        else:
+            confidence = 0.88
+        return {
+            "kind": "debenture",
+            "macro_class": "Renda Fixa",
+            "subclasse": _subclasse_from_indexador(indexador),
+            "exposure": "Brasil",
+            "underlying_nature": "credito",
+            "estrutura": "debenture",
+            "debenture": deb,
+            "tax": tax,
+            "confidence": confidence,
+            "notes": note or "Debênture → Renda Fixa.",
+        }
+
+    # 3) Securitização (CRA/CRI) → RF.
+    if n.has_token("CRA", "CRI") or n.name_contains(
+        "CERT. RECEBIVEIS", "CERTIFICADO DE RECEBIVEIS"
+    ):
+        agro = n.has_token("CRA") or n.name_contains("AGRONEGOCIO")
+        return {
+            "kind": "cra" if agro else "cri",
+            "macro_class": "Renda Fixa",
+            "subclasse": "Crédito Privado",
+            "exposure": "Brasil",
+            "underlying_nature": "recebiveis",
+            "tax": {"isento": True},  # CRA/CRI: IR-exempt for PF
+            "confidence": 0.9,
+            "notes": "Securitização (recebíveis) → Renda Fixa, isento p/ PF.",
+        }
+
+    # 4) Bank paper (CDB/RDB/LIG/Letra Financeira/Letra de Câmbio) → RF.
+    #    NB: the bare 2-char tokens "LC"/"LF" are too collision-prone (they hit
+    #    issuer names, share classes, internal codes), so they are matched only
+    #    via their unambiguous phrases, never as bare tokens.
+    if n.has_token("CDB", "RDB", "LIG") or n.name_contains("LETRA FINANCEIRA", "LETRA DE CAMBIO"):
+        return {
+            "kind": "cdb",
+            "macro_class": "Renda Fixa",
+            "subclasse": _subclasse_from_indexador(parse_indexador(n.name_folded)),
+            "exposure": "Brasil",
+            "underlying_nature": "credito",
+            "confidence": 0.88,
+            "notes": "Emissão bancária → Renda Fixa.",
+        }
+    if n.has_token("LCI", "LCA") or n.name_contains(
+        "LETRA DE CREDITO IMOBILIARIO", "LETRA DE CREDITO DO AGRONEGOCIO"
+    ):
+        return {
+            "kind": "lci_lca",
+            "macro_class": "Renda Fixa",
+            "subclasse": _subclasse_from_indexador(parse_indexador(n.name_folded)),
+            "exposure": "Brasil",
+            "underlying_nature": "credito",
+            "tax": {"isento": True},
+            "confidence": 0.9,
+            "notes": "LCI/LCA → Renda Fixa, isento p/ PF.",
+        }
+
+    # 5) Tesouro / public bonds → RF.
+    if n.has_token("TESOURO", "NTN", "LTN", "LFT", "NTNB", "NTNF") or n.name_contains(
+        "TESOURO DIRETO", "TESOURO SELIC", "TESOURO IPCA", "TESOURO PREFIXADO"
+    ):
+        return {
+            "kind": "tesouro",
+            "macro_class": "Renda Fixa",
+            "subclasse": _subclasse_from_indexador(parse_indexador(n.name_folded)),
+            "exposure": "Brasil",
+            "underlying_nature": "tesouro",
+            "confidence": 0.95,
+            "notes": "Título público federal → Renda Fixa.",
+        }
+
+    # 6) Internacional EXPOSURE — IE structure, or global keyword. Geography is
+    #    the `exposure` axis, NOT a macro class: the asset class still comes from
+    #    the fund type (equities→RV, dívida externa→RF, else Multimercado). BOTH
+    #    triggers require a fund context: a bare "IE"/"GLOBAL" token outside a
+    #    fund name is too collision-prone (e.g. "COMPANHIA IE ENERGIA SA").
+    #    Runs before FIA/Ações so "FIC FIA IE" / "GLOBAL FIM" land here.
+    fund_context = n.has_token(*_FUND_CONTEXT_TOKENS)
+    if fund_context and (
+        n.has_token("IE")
+        or n.name_contains(*_GLOBAL_KEYWORDS, "INVESTIMENTO NO EXTERIOR", "INV EXTERIOR")
+    ):
+        equities = n.has_token("FIA") or n.name_contains("ACOES", "EQUITY")
+        rf = n.name_contains("DIVIDA EXTERNA", "RENDA FIXA", "BOND", "CREDITO", "DEBT")
+        if equities:
+            macro, subclasse, underlying = "Renda Variável", "Ações Global", "acoes"
+        elif rf:
+            macro, subclasse, underlying = "Renda Fixa", "Dívida Externa", "credito"
+        else:
+            macro, subclasse, underlying = "Multimercado", "Multimercado Global", "multiativos"
+        return {
+            "kind": "fundo",
+            "macro_class": macro,
+            "subclasse": subclasse,
+            "exposure": "Internacional",
+            "underlying_nature": underlying,
+            "estrutura": "IE" if n.has_token("IE") else "FIC",
+            "confidence": 0.9,
+            "notes": f"Mandato internacional (IE / global): {macro}, exposição Internacional.",
+        }
+
+    # 7) FII (by name; ticker-only 11s are caught at step 12).
+    if n.has_token("FII") or n.name_contains(
+        "FUNDO IMOBILIARIO",
+        "FDO INV IMOB",
+        "FUNDO DE INVESTIMENTO IMOBILIARIO",
+        "INVESTIMENTO IMOBILIARIO",
+    ):
+        return {
+            "kind": "fii",
+            "macro_class": "Renda Variável",
+            "subclasse": "FII",
+            "exposure": "Brasil",
+            "underlying_nature": "imoveis",
+            "estrutura": "FII",
+            "confidence": 0.92,
+            "notes": "Fundo Imobiliário → Renda Variável (subclasse FII).",
+        }
+
+    # 8) ETF by name, no curated hit → infer underlying from name keywords.
+    if n.has_token("ETF") or n.name_contains("ISHARES", "INDEX FUND"):
+        rf = n.name_contains("RENDA FIXA", "DEBENTURE", "BOND", "IMA-", "IRF-", "TESOURO", "INFRA")
+        if rf:
+            sovereign = n.name_contains("TESOURO", "IMA-", "IRF-", "LFT", "NTN", "LTN")
+            credit = n.name_contains("DEBENTURE", "INFRA")
+            return {
+                "kind": "etf",
+                "macro_class": "Renda Fixa",
+                "subclasse": "ETF de renda fixa",
+                "exposure": "Brasil",
+                "underlying_nature": "debentures"
+                if credit
+                else ("tesouro" if sovereign else "credito"),
+                "estrutura": "ETF",
+                "confidence": 0.78,
+                "notes": "ETF com underlying de renda fixa (inferido do nome).",
+            }
+        intl = n.name_contains(*_GLOBAL_KEYWORDS, "S&P", "SP500", "NASDAQ", "MSCI", "EUA", "US ")
+        return {
+            "kind": "etf",
+            "macro_class": "Renda Variável",
+            "subclasse": "ETF de ações internacional" if intl else "ETF de ações",
+            "exposure": "Internacional" if intl else "Brasil",
+            "underlying_nature": "acoes",
+            "estrutura": "ETF",
+            "confidence": 0.72,
+            "notes": "ETF sem ticker no seed; underlying assumido = ações. Confirmar.",
+        }
+
+    # 9) FIDC → RF (direitos creditórios, natureza de crédito).
+    if n.has_token("FIDC") or n.name_contains("DIREITOS CREDITORIOS"):
+        return {
+            "kind": "fundo",
+            "macro_class": "Renda Fixa",
+            "subclasse": "Crédito Estruturado",
+            "exposure": "Brasil",
+            "underlying_nature": "recebiveis",
+            "estrutura": "FIDC",
+            "confidence": 0.85,
+            "notes": "FIDC (direitos creditórios) → Renda Fixa (crédito).",
+        }
+
+    # 10) FIP → Alternativos (private equity).
+    if n.has_token("FIP") or n.name_contains("PARTICIPACOES", "PRIVATE EQUITY"):
+        return {
+            "kind": "fundo",
+            "macro_class": "Alternativos",
+            "subclasse": "Private Equity",
+            "underlying_nature": "private_equity",
+            "estrutura": "FIP",
+            "confidence": 0.88,
+            "notes": "FIP (participações) → Alternativos.",
+        }
+
+    # 11) Multimercado.
+    if n.has_token("FIM") or n.name_contains("MULTIMERCADO", "MULTIESTRATEGIA", "MACRO"):
+        return {
+            "kind": "fundo",
+            "macro_class": "Multimercado",
+            "subclasse": "Multimercado",
+            "underlying_nature": "multiativos",
+            "estrutura": "FIM",
+            "confidence": 0.85,
+            "notes": "Multimercado.",
+        }
+
+    # 12) Ações / FIA (domestic equities).
+    if n.has_token("FIA") or n.name_contains("FUNDO DE ACOES", "ACOES", "EQUITY"):
+        return {
+            "kind": "fundo",
+            "macro_class": "Renda Variável",
+            "subclasse": "Ações",
+            "exposure": "Brasil",
+            "underlying_nature": "acoes",
+            "estrutura": "FIA",
+            "confidence": 0.85,
+            "notes": "Fundo de Ações → Renda Variável.",
+        }
+
+    # 13) Ticker shapes (no name signal won above).
+    suffix = n.ticker_digits_suffix
+    if n.ticker:
+        # 11 not in any curated ETF/RF list → overwhelmingly a FII.
+        if suffix == "11":
+            return {
+                "kind": "fii",
+                "macro_class": "Renda Variável",
+                "subclasse": "FII",
+                "exposure": "Brasil",
+                "underlying_nature": "imoveis",
+                "estrutura": "FII",
+                "confidence": 0.72,
+                "notes": "Ticker terminado em 11 fora do seed de ETFs → FII (heurística).",
+            }
+        # BDR (34/35): recibo de ação estrangeira. RV por classe, mas o holder
+        # carrega risco cambial/exterior → Internacional por exposição (default;
+        # BDRs de empresa brasileira no exterior são exceção, não a regra).
+        if suffix in {"34", "35"}:
+            return {
+                "kind": "bdr",
+                "macro_class": "Renda Variável",
+                "subclasse": "BDR",
+                "exposure": "Internacional",
+                "underlying_nature": "acoes",
+                "confidence": 0.8,
+                "notes": "BDR (recibo de ação estrangeira) → RV, exposição Internacional.",
+            }
+        # 3-8: ordinary/preferred share — ação brasileira.
+        return {
+            "kind": "acao",
+            "macro_class": "Renda Variável",
+            "subclasse": "Ações",
+            "exposure": "Brasil",
+            "underlying_nature": "acoes",
+            "confidence": 0.85,
+            "notes": "Ação listada na B3 → Renda Variável.",
+        }
+
+    # 14) Nothing matched — honest "I don't know" for HITL review.
+    return {
+        "kind": "outro",
+        "macro_class": "Indefinido",
+        "confidence": 0.2,
+        "notes": "Sem sinal estrutural suficiente; requer revisão (human-in-the-loop).",
+    }
+
+
+# ── Assembly ───────────────────────────────────────────────────────
+
+
+def _resolve_exposure(payload: dict[str, Any]) -> Exposure | None:
+    """The geography axis, taken from the rule/seed payload. ``None`` when the
+    rule could not decide (e.g. a COE whose underlying may be either)."""
+    explicit = payload.get("exposure")
+    return cast(Exposure, explicit) if explicit is not None else None
+
+
+def _assemble(norm: NormalizedInput, payload: dict[str, Any], step: str) -> AssetClassification:
+    """Turn a rule/seed payload dict into the typed output contract."""
+    deb = payload.get("debenture")
+    tax = payload.get("tax") or {}
+    return AssetClassification(
+        identifier_resolved=IdentifierResolved(
+            cnpj=norm.cnpj, ticker=norm.ticker, isin=norm.isin, name=norm.name_raw
+        ),
+        kind=payload["kind"],
+        cvm=CvmInfo(
+            classe=payload.get("cvm_classe"),
+            anbima_categoria=payload.get("anbima_categoria"),
+            estrutura=payload.get("estrutura"),
+        ),
+        macro_class=payload["macro_class"],
+        subclasse=payload.get("subclasse"),
+        exposure=_resolve_exposure(payload),
+        underlying_nature=payload.get("underlying_nature"),
+        debenture=DebentureInfo(**deb) if deb else None,
+        tax=TaxInfo(**tax),
+        source=payload.get("source", "openfindata"),
+        confidence=payload.get("confidence", 0.5),
+        as_of=datetime.now(_BR_TZ).date().isoformat(),
+        cascade=[step],
+        notes=payload.get("notes"),
+    )
+
+
+def classify(norm: NormalizedInput) -> AssetClassification:
+    """Pure, offline classification: curated seed → structural rules.
+
+    Always returns a record (``Indefinido`` when nothing matches). This is the
+    deterministic core that the spec test set exercises with no network.
+    """
+    seed = lookup_seed(ticker=norm.ticker, cnpj=norm.cnpj, name_folded=norm.name_folded)
+    if seed is not None:
+        return _assemble(norm, seed.payload, step="openfindata:curated")
+    return _assemble(norm, _rule_payload(norm), step="openfindata:rules")
+
+
+async def resolve_asset(
+    name: str | None = None,
+    *,
+    cnpj: str | None = None,
+    ticker: str | None = None,
+    isin: str | None = None,
+    providers: list[AssetProvider] | None = None,
+) -> AssetClassification:
+    """Resolve an asset to its Wealthuman classification.
+
+    Runs the deterministic core (curated seed → structural rules), then walks the
+    optional external provider chain (Mais Retorno → CVM/B3 → restricted web
+    search) only while the result is still weak (``Indefinido`` or low
+    confidence). Each provider that fires is appended to ``cascade`` and may lower
+    confidence; the deepest one to set a field owns ``source``.
+
+    No PII: callers pass only an asset identifier, never client data.
+    """
+    norm = normalize(name=name, cnpj=cnpj, ticker=ticker, isin=isin)
+    result = classify(norm)
+
+    for provider in providers or []:
+        # Stop early once we are confident — saves the network round-trips.
+        if result.macro_class != "Indefinido" and result.confidence >= _CONFIDENT_ENOUGH:
+            break
+        enriched = await provider(norm, result)
+        if enriched is not None:
+            enriched.cascade = [*result.cascade, *enriched.cascade]
+            result = enriched
+    return result
diff --git a/src/findata/resolver/models.py b/src/findata/resolver/models.py
new file mode 100644
index 0000000..386dfcf
--- /dev/null
+++ b/src/findata/resolver/models.py
@@ -0,0 +1,124 @@
+"""Output contract for ``resolve_asset`` — the Wealthuman classification.
+
+The resolver's job is to turn *any* asset identifier (ticker, CNPJ, ISIN, or
+bare name) into a classification **already mapped to the Wealthuman macro
+taxonomy**, not the raw CVM/ANBIMA category. Every field that can drive a
+human-in-the-loop decision (``source``, ``confidence``, ``as_of``, ``cascade``)
+is explicit, so a consolidated statement can be audited line by line.
+
+Shapes mirror the spec in ``openfindata-mcp-spec.md`` §Output. Kept in lockstep
+with the engine in :mod:`findata.resolver.engine`.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+# ── Controlled vocabularies ────────────────────────────────────────
+
+# Veículo / instrumento. Mirrors the spec's ``kind`` enum.
+Kind = Literal[
+    "fundo",
+    "acao",
+    "fii",
+    "etf",
+    "bdr",
+    "debenture",
+    "cra",
+    "cri",
+    "cdb",
+    "lci_lca",
+    "tesouro",
+    "coe",
+    "outro",
+]
+
+# Wealthuman macro taxonomy — PURE asset class. Geography is NOT a macro value:
+# "Internacional" lives only on the orthogonal ``Exposure`` axis. So an offshore
+# equity fund is RV + exposure=Internacional, offshore debt is RF + Internacional.
+# ``Indefinido`` is the honest answer when no layer can decide (drives HITL review).
+MacroClass = Literal[
+    "Renda Fixa",
+    "Renda Variável",
+    "Multimercado",
+    "Alternativos",
+    "Estruturados",
+    "Indefinido",
+]
+
+# Geography/strategy axis — *where the economic exposure sits*, orthogonal to
+# the asset class. A B3-listed equity ETF on the S&P 500 (IVVB11) is RV by class
+# but Internacional by exposure; a BDR is RV but the holder bears USD/foreign
+# risk → Internacional. The B3 listing is only the asset's domicile, not its
+# exposure. ``None`` when the resolver cannot decide.
+Exposure = Literal["Brasil", "Internacional"]
+
+# Economic nature of the underlying. For ETFs/funds this is what splits an
+# ETF-de-ações (RV) from an ETF-de-debêntures (RF) — see IFRA11 vs IVVB11.
+UnderlyingNature = Literal[
+    "acoes",
+    "debentures",
+    "credito",
+    "recebiveis",
+    "imoveis",
+    "multiativos",
+    "tesouro",
+    "cambio",
+    "private_equity",
+    "outro",
+]
+
+
+class IdentifierResolved(BaseModel):
+    """The identifiers the resolver could normalize/confirm from the input."""
+
+    cnpj: str | None = None
+    ticker: str | None = None
+    isin: str | None = None
+    name: str | None = None
+
+
+class CvmInfo(BaseModel):
+    """Raw upstream classification, kept for audit alongside the mapped macro."""
+
+    classe: str | None = None
+    anbima_categoria: str | None = None
+    estrutura: str | None = None  # FIA | FIM | FIC | FIDC | FIP | FII | IE | ETF | ...
+
+
+class DebentureInfo(BaseModel):
+    """Debenture-specific facts. Only populated when ``kind == 'debenture'``
+    (or an FI-Infra ETF whose underlying *is* incentivada debentures)."""
+
+    incentivada_1243: bool | None = None  # Lei 12.431 (infra) — IR-exempt for PF
+    indexador: str | None = None  # IPCA+ | CDI+ | %CDI | PREFIXADO | SELIC
+    vencimento: str | None = None  # YYYY-MM when known
+
+
+class TaxInfo(BaseModel):
+    """Tax treatment for the typical PF holder."""
+
+    isento: bool | None = None  # True for Lei 12.431 / LCI-LCA / FII dividends etc.
+
+
+class AssetClassification(BaseModel):
+    """The full resolver output. One asset in → one auditable record out."""
+
+    identifier_resolved: IdentifierResolved
+    kind: Kind
+    cvm: CvmInfo = Field(default_factory=CvmInfo)
+    macro_class: MacroClass
+    subclasse: str | None = None
+    exposure: Exposure | None = None  # geography/strategy axis (Brasil vs Internacional)
+    underlying_nature: UnderlyingNature | None = None
+    debenture: DebentureInfo | None = None
+    tax: TaxInfo = Field(default_factory=TaxInfo)
+    source: str  # openfindata | maisretorno | cvm | b3 | web_search
+    confidence: float = Field(ge=0.0, le=1.0)
+    as_of: str  # YYYY-MM-DD
+    # Audit trail: ordered list of resolution steps actually attempted.
+    cascade: list[str] = Field(default_factory=list)
+    # Free-text rationale, e.g. which trap was avoided or which signal decided.
+    notes: str | None = None
diff --git a/src/findata/resolver/normalize.py b/src/findata/resolver/normalize.py
new file mode 100644
index 0000000..a6188c7
--- /dev/null
+++ b/src/findata/resolver/normalize.py
@@ -0,0 +1,125 @@
+"""Identifier normalization for the resolver.
+
+Turns the loose input (``{name, cnpj, ticker, isin}`` — any subset) into a
+canonical :class:`NormalizedInput` the rule engine can pattern-match against:
+folded/uppercased name tokens, a digits-only CNPJ, an uppercased ticker, and an
+ISIN. Pure, deterministic, no I/O — so it is trivially cacheable.
+"""
+
+from __future__ import annotations
+
+import re
+import unicodedata
+from dataclasses import dataclass, field
+
+# A B3 ticker: 4 letters + 1-2 digits (PETR4, IVVB11, HGLG11). Fractional and
+# subscription receipts (F, suffixes) are out of scope for classification.
+_TICKER_RE = re.compile(r"^[A-Z]{4}\d{1,2}$")
+# BDR: 4 letters + 34/35 (level I / level II). e.g. AAPL34, MSFT34.
+_BDR_RE = re.compile(r"^[A-Z]{4}3[45]$")
+# ISIN: 2-letter country + 9 alnum + 1 check digit. Brazil = BR....
+_ISIN_RE = re.compile(r"^[A-Z]{2}[A-Z0-9]{9}\d$")
+_CNPJ_LEN = 14
+
+
+def fold(text: str) -> str:
+    """ASCII-fold + uppercase, the canonical form for keyword matching.
+
+    ``"Crédito Estruturado"`` → ``"CREDITO ESTRUTURADO"``. Mirrors how the
+    registry stores tokens, so comparisons line up.
+    """
+    nfkd = unicodedata.normalize("NFKD", text)
+    ascii_only = "".join(c for c in nfkd if not unicodedata.combining(c))
+    return ascii_only.upper().strip()
+
+
+def _digits(text: str) -> str:
+    return re.sub(r"\D", "", text)
+
+
+def tokenize(name: str) -> list[str]:
+    """Split a folded name into alphanumeric tokens (keeps ``+`` joined runs out).
+
+    ``"DEB PETROBRAS IPCA+"`` → ``["DEB", "PETROBRAS", "IPCA"]``. The ``+`` is
+    dropped from tokens but preserved in the raw folded name, which the indexador
+    parser reads, so ``IPCA+`` is still recoverable.
+    """
+    return [t for t in re.split(r"[^A-Z0-9]+", fold(name)) if t]
+
+
+@dataclass(frozen=True)
+class NormalizedInput:
+    """Canonical, deterministic view of the caller's identifiers."""
+
+    name_raw: str | None = None  # original, for echo-back
+    name_folded: str = ""  # ASCII-folded + uppercased full string
+    tokens: tuple[str, ...] = field(default_factory=tuple)
+    cnpj: str | None = None  # 14 digits, or None
+    ticker: str | None = None  # uppercased B3 ticker, or None
+    isin: str | None = None
+
+    def has_token(self, *candidates: str) -> bool:
+        """True if any candidate appears as a whole token."""
+        tset = set(self.tokens)
+        return any(c in tset for c in candidates)
+
+    def name_contains(self, *needles: str) -> bool:
+        """True if any needle is a substring of the folded name (phrase match)."""
+        return any(n in self.name_folded for n in needles)
+
+    @property
+    def ticker_digits_suffix(self) -> str | None:
+        """The trailing digits of the ticker (``"11"`` for HGLG11), or None."""
+        if not self.ticker:
+            return None
+        m = re.search(r"(\d{1,2})$", self.ticker)
+        return m.group(1) if m else None
+
+
+def normalize(
+    *,
+    name: str | None = None,
+    cnpj: str | None = None,
+    ticker: str | None = None,
+    isin: str | None = None,
+) -> NormalizedInput:
+    """Build a :class:`NormalizedInput` from any subset of identifiers.
+
+    A bare ``name`` that is itself a ticker/CNPJ/ISIN is promoted to the right
+    field, so callers can pass a single opaque string and still get structured
+    signals (the consolidator often only has the statement label).
+    """
+    # Promote a bare identifier passed as `name` into its typed slot.
+    if name and not (ticker or cnpj or isin):
+        candidate = fold(name)
+        if _TICKER_RE.match(candidate) or _BDR_RE.match(candidate):
+            ticker = candidate
+        elif _ISIN_RE.match(candidate):
+            isin = candidate
+        elif len(_digits(name)) == _CNPJ_LEN and not re.search(r"[A-Za-z]", name):
+            cnpj = name
+
+    cnpj_norm = None
+    if cnpj:
+        d = _digits(cnpj)
+        cnpj_norm = d if len(d) == _CNPJ_LEN else None
+
+    ticker_norm = None
+    if ticker:
+        t = fold(ticker)
+        ticker_norm = t if (_TICKER_RE.match(t) or _BDR_RE.match(t)) else None
+
+    isin_norm = None
+    if isin:
+        i = fold(isin)
+        isin_norm = i if _ISIN_RE.match(i) else None
+
+    folded = fold(name) if name else ""
+    return NormalizedInput(
+        name_raw=name,
+        name_folded=folded,
+        tokens=tuple(tokenize(name)) if name else (),
+        cnpj=cnpj_norm,
+        ticker=ticker_norm,
+        isin=isin_norm,
+    )
diff --git a/src/findata/resolver/seed.py b/src/findata/resolver/seed.py
new file mode 100644
index 0000000..c83a60c
--- /dev/null
+++ b/src/findata/resolver/seed.py
@@ -0,0 +1,169 @@
+"""Curated knowledge base for classifications that are *not derivable* from the
+identifier alone.
+
+Two honest cases need a curated table, and only these:
+
+1. **ETFs** — an ETF's macro follows its *underlying*, and the ticker carries no
+   underlying signal. ``IVVB11`` (S&P 500 equities → RV) and ``IFRA11``
+   (infra debentures → RF) both end in ``11``; nothing in the symbol separates
+   them. The B3 ETF universe is small (~100 listed) and stable, so a curated
+   ticker→underlying map is the deterministic, auditable answer.
+2. **Global-mandate funds with no structural tell** — ``ARBOR FIC FIA`` is an
+   equities wrapper (FIA → RV) whose mandate is global, but the name has no
+   ``IE`` and no "global"/"world" keyword. Only fund-level knowledge sets its
+   exposure=Internacional (economic nature beats the wrapper); the asset class
+   stays Renda Variável.
+
+Everything else is settled by the structural rules in
+:mod:`findata.resolver.engine` and never reaches this table. Keep this list
+small and sourced — it is a maintenance liability, not a dumping ground.
+
+``confidence`` here is intentionally high (curated, manually verified) but < 1.0:
+the underlying universe can change (an ETF can be delisted, a fund can change
+mandate), so a curated hit is strong, not infallible.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from findata.resolver.normalize import fold
+
+
+@dataclass(frozen=True)
+class SeedEntry:
+    """One curated classification, matched by ticker / CNPJ / name substrings."""
+
+    payload: dict[str, Any]
+    ticker: str | None = None
+    cnpj: str | None = None
+    name_substrings: tuple[str, ...] = field(default_factory=tuple)
+
+
+# ── B3 ETFs: ticker → (index, exposure) ────────────────────────────
+# RV ETFs (equity underlying). macro is always Renda Variável (asset class);
+# the geography is the orthogonal *exposure* axis. IVVB11 is the spec case: it
+# is RV by class but its exposure is Internacional (tracks the S&P 500) — the B3
+# listing is only the asset's domicile, not where the risk sits.
+_EQUITY_ETFS = {
+    # Brazilian-equity exposure.
+    "BOVA11": ("Ibovespa", "Brasil"),
+    "SMAL11": ("Small Caps", "Brasil"),
+    "BOVV11": ("Ibovespa", "Brasil"),
+    "PIBB11": ("IBrX-50", "Brasil"),
+    "DIVO11": ("Dividendos", "Brasil"),
+    "BOVB11": ("Ibovespa", "Brasil"),
+    # International-equity exposure (B3-listed, foreign underlying).
+    "IVVB11": ("S&P 500", "Internacional"),
+    "XINA11": ("China (MSCI)", "Internacional"),
+    "NASD11": ("Nasdaq-100", "Internacional"),
+    "SPXI11": ("S&P 500", "Internacional"),
+    "EURP11": ("Europa", "Internacional"),
+    "ACWI11": ("Global (ACWI)", "Internacional"),
+}
+
+# RF ETFs (fixed-income underlying). IFRA11 is the headline case: an FI-Infra
+# ETF holding Lei-12.431 infra debentures → Renda Fixa, IR-exempt for PF.
+_FIXED_INCOME_ETFS = {
+    "IFRA11": ("debentures", "Indexada à Inflação", True),
+    "IB5M11": ("tesouro", "Indexada à Inflação", False),
+    "IMAB11": ("tesouro", "Indexada à Inflação", False),
+    "B5P211": ("tesouro", "Indexada à Inflação", False),
+    "IRFM11": ("tesouro", "Prefixada", False),
+    "FIXA11": ("tesouro", "Prefixada", False),
+    "LFTS11": ("tesouro", "Pós-fixada", False),
+    "B5MB11": ("tesouro", "Indexada à Inflação", False),
+}
+
+
+def _build_etf_seed() -> list[SeedEntry]:
+    entries: list[SeedEntry] = []
+    for ticker, (idx, exposure) in _EQUITY_ETFS.items():
+        intl = exposure == "Internacional"
+        entries.append(
+            SeedEntry(
+                ticker=ticker,
+                payload={
+                    "kind": "etf",
+                    "macro_class": "Renda Variável",
+                    "subclasse": "ETF de ações internacional" if intl else "ETF de ações",
+                    "exposure": exposure,
+                    "underlying_nature": "acoes",
+                    "estrutura": "ETF",
+                    "notes": f"Curated: ETF de ações ({idx}); classe RV, exposição {exposure}.",
+                    "confidence": 0.97,
+                },
+            )
+        )
+    for ticker, (underlying, subclasse, incentivada) in _FIXED_INCOME_ETFS.items():
+        note = "ETF de renda fixa; classifica pelo underlying (→ RF)."
+        if incentivada:
+            note = (
+                "ETF de debêntures de infraestrutura (FI-Infra, Lei 12.431); "
+                "underlying = debêntures incentivadas → RF, isento p/ PF."
+            )
+        payload = {
+            "kind": "etf",
+            "macro_class": "Renda Fixa",
+            "subclasse": subclasse,
+            "exposure": "Brasil",
+            "underlying_nature": underlying,
+            "estrutura": "ETF",
+            "notes": f"Curated: {note}",
+            "confidence": 0.97,
+        }
+        if incentivada:
+            payload["debenture"] = {"incentivada_1243": True, "indexador": "IPCA+"}
+            payload["tax"] = {"isento": True}
+        entries.append(SeedEntry(ticker=ticker, payload=payload))
+    return entries
+
+
+# ── Global-mandate funds with no structural tell ───────────────────
+_GLOBAL_FUNDS = [
+    SeedEntry(
+        # Require both the brand token AND the FIA structure so an unrelated
+        # "ARBOR Crédito Privado FIM" is NOT swept into the global-equity seed.
+        name_substrings=("ARBOR", "FIA"),
+        payload={
+            "kind": "fundo",
+            "macro_class": "Renda Variável",
+            "subclasse": "Ações Global",
+            "exposure": "Internacional",
+            "underlying_nature": "acoes",
+            "estrutura": "FIA",
+            "notes": (
+                "Curated: FIC FIA de mandato global sem sufixo IE; classe RV "
+                "(ações), exposição Internacional pela natureza econômica."
+            ),
+            "confidence": 0.93,
+        },
+    ),
+]
+
+
+SEED_ENTRIES: list[SeedEntry] = _build_etf_seed() + _GLOBAL_FUNDS
+
+# Index by ticker for O(1) hits (the common path).
+_BY_TICKER: dict[str, SeedEntry] = {e.ticker: e for e in SEED_ENTRIES if e.ticker}
+_BY_CNPJ: dict[str, SeedEntry] = {e.cnpj: e for e in SEED_ENTRIES if e.cnpj}
+_NAME_ENTRIES: list[SeedEntry] = [e for e in SEED_ENTRIES if e.name_substrings]
+
+
+def lookup_seed(*, ticker: str | None, cnpj: str | None, name_folded: str) -> SeedEntry | None:
+    """Return the curated entry for this identifier, or ``None``.
+
+    Ticker and CNPJ are exact; name match requires every configured substring to
+    be present in the folded name (so ``("ARBOR",)`` matches "ARBOR FIC FIA").
+    """
+    if ticker and ticker in _BY_TICKER:
+        return _BY_TICKER[ticker]
+    if cnpj and cnpj in _BY_CNPJ:
+        return _BY_CNPJ[cnpj]
+    if name_folded:
+        folded = fold(name_folded)
+        for entry in _NAME_ENTRIES:
+            if all(sub in folded for sub in entry.name_substrings):
+                return entry
+    return None
diff --git a/src/findata/resolver/taxonomy.py b/src/findata/resolver/taxonomy.py
new file mode 100644
index 0000000..97aebeb
--- /dev/null
+++ b/src/findata/resolver/taxonomy.py
@@ -0,0 +1,85 @@
+"""CVM/ANBIMA → Wealthuman macro mapping.
+
+The spec is explicit (§Regras-chave #1): *the* mapping from the raw CVM class /
+ANBIMA category to the Wealthuman macro taxonomy lives **in the resolver**, not
+in the caller. This module is that mapping, kept as plain data so it is
+auditable and easy to extend when ANBIMA renames a category.
+
+It is consulted by the registry-enrichment step of the cascade, when we have a
+CVM ``classe`` / ``CLASSE_ANBIMA`` string but the structural rules in
+:mod:`findata.resolver.engine` did not already settle the macro from the name.
+"""
+
+from __future__ import annotations
+
+from findata.resolver.normalize import fold
+
+# ── CVM legal class (campo CLASSE do cad_fi) → macro (asset class) ──
+# The CVM "classe" is the legal/regulatory bucket. macro is PURE asset class;
+# geography (Internacional) is a separate axis — so "Fundo de Dívida Externa" is
+# Renda Fixa here, with its Internacional exposure set by the exposure map below.
+CVM_CLASSE_TO_MACRO: dict[str, str] = {
+    "FUNDO DE ACOES": "Renda Variável",
+    "FUNDO DE RENDA FIXA": "Renda Fixa",
+    "FUNDO MULTIMERCADO": "Multimercado",
+    "FUNDO CAMBIAL": "Multimercado",  # câmbio puro — banker treats as Multi/Alt
+    "FUNDO DE CURTO PRAZO": "Renda Fixa",
+    "FUNDO REFERENCIADO": "Renda Fixa",
+    "FUNDO DE DIVIDA EXTERNA": "Renda Fixa",  # asset class RF; exposure Internacional
+    # FI-Infra (debêntures incentivadas) — RF by underlying.
+    "FI-INFRA": "Renda Fixa",
+    "FIC FI-INFRA": "Renda Fixa",
+}
+
+# ── ANBIMA category (CLASSE_ANBIMA) → macro (asset class) ──────────
+# Richer than the legal class: the ANBIMA category encodes the mandate. Matched
+# by substring on the folded string. macro is the asset class only — the
+# "investimento no exterior" / "dívida externa" mandate feeds the EXPOSURE map,
+# not macro, so an "Ações Investimento no Exterior" is RV + Internacional.
+ANBIMA_SUBSTRING_TO_MACRO: tuple[tuple[str, str], ...] = (
+    # Core asset classes (geography handled separately).
+    ("DIVIDA EXTERNA", "Renda Fixa"),
+    ("RENDA FIXA", "Renda Fixa"),
+    ("ACOES", "Renda Variável"),
+    ("MULTIMERCADO", "Multimercado"),
+    ("CAMBIAL", "Multimercado"),
+    # Structured / private-market vehicles.
+    ("FIP", "Alternativos"),
+    ("PRIVATE EQUITY", "Alternativos"),
+    ("FIDC", "Renda Fixa"),  # direitos creditórios — credit nature → RF
+    ("IMOBILIARIO", "Renda Variável"),  # FII
+)
+
+# Substrings in a CVM/ANBIMA category that mark Internacional exposure.
+_INTERNACIONAL_MARKERS: tuple[str, ...] = (
+    "INVESTIMENTO NO EXTERIOR",
+    "DIVIDA EXTERNA",
+    "EXTERIOR",
+    "GLOBAL",
+)
+
+
+def map_cvm_classe(classe: str | None) -> str | None:
+    """Map a raw CVM legal ``CLASSE`` to a Wealthuman macro (asset class), or ``None``."""
+    if not classe:
+        return None
+    return CVM_CLASSE_TO_MACRO.get(fold(classe))
+
+
+def map_anbima_categoria(categoria: str | None) -> str | None:
+    """Map a raw ANBIMA category to a Wealthuman macro by first-match substring."""
+    if not categoria:
+        return None
+    folded = fold(categoria)
+    for needle, macro in ANBIMA_SUBSTRING_TO_MACRO:
+        if needle in folded:
+            return macro
+    return None
+
+
+def map_exposure(categoria: str | None) -> str | None:
+    """Detect Internacional exposure from a CVM/ANBIMA category, else ``None``."""
+    if not categoria:
+        return None
+    folded = fold(categoria)
+    return "Internacional" if any(m in folded for m in _INTERNACIONAL_MARKERS) else None
diff --git a/tests/test_mcp_surface.py b/tests/test_mcp_surface.py
index ebffc61..f662847 100644
--- a/tests/test_mcp_surface.py
+++ b/tests/test_mcp_surface.py
@@ -19,8 +19,8 @@
 from findata.api.app import app
 from findata.api.mcp_app import mcp_app
 
-EXPECTED_TOOLS = 24  # curated tools with code mode OFF (the default)
-EXPECTED_REST_OPERATIONS = 95  # all REST routes (unconditional); bump when the surface changes
+EXPECTED_TOOLS = 25  # curated tools with code mode OFF (the default)
+EXPECTED_REST_OPERATIONS = 96  # all REST routes (unconditional); bump when the surface changes
 
 _HTTP_METHODS = {"get", "post", "put", "delete", "patch"}
 
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
new file mode 100644
index 0000000..fc42dab
--- /dev/null
+++ b/tests/test_resolver.py
@@ -0,0 +1,232 @@
+"""Resolver test set — the canonical cases from ``openfindata-mcp-spec.md``.
+
+Every assertion is offline and deterministic: the curated seed + structural
+rules settle each case with no network. The 8 spec cases plus the explicit
+traps the ordering must defend.
+"""
+
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+
+from findata.resolver import classify, normalize, resolve_asset
+
+
+def _resolve(**kw):
+    return asyncio.run(resolve_asset(**kw))
+
+
+# ── The spec test set (§Test set) ──────────────────────────────────
+
+
+def test_ifra11_is_renda_fixa_inflation_etf_of_debentures():
+    r = _resolve(ticker="IFRA11", name="FI ITAUINFRA CI")
+    assert r.macro_class == "Renda Fixa"
+    assert r.kind == "etf"
+    assert r.subclasse == "Indexada à Inflação"
+    assert r.underlying_nature == "debentures"
+    assert r.debenture and r.debenture.incentivada_1243 is True
+    assert r.tax.isento is True
+
+
+def test_arbor_fic_fia_global_mandate_is_rv_exposure_internacional():
+    # Geography is the exposure axis: a global equities FIA is RV by class.
+    r = _resolve(name="ARBOR FIC FIA")
+    assert r.macro_class == "Renda Variável"
+    assert r.exposure == "Internacional"
+
+
+def test_whg_global_fic_fia_ie_is_rv_exposure_internacional():
+    r = _resolve(name="WHG GLOBAL EQUITY FIC FIA IE")
+    assert r.macro_class == "Renda Variável"
+    assert r.exposure == "Internacional"
+
+
+def test_deb_petrobras_ipca_is_incentivada_isento_rf():
+    r = _resolve(name="DEB PETROBRAS IPCA+")
+    assert r.macro_class == "Renda Fixa"
+    assert r.kind == "debenture"
+    assert r.debenture and r.debenture.incentivada_1243 is True
+    assert r.debenture.indexador == "IPCA+"
+    assert r.tax.isento is True
+
+
+def test_coe_is_estruturados_never_etf():
+    r = _resolve(name="INVEST. ESTRUTURADOS COE BTG")
+    assert r.macro_class == "Estruturados"
+    assert r.kind == "coe"
+    assert r.kind != "etf"
+
+
+def test_credito_estruturado_name_trap_is_renda_fixa():
+    # "Crédito Estruturado" (Warren/AMW) is RF credit, NOT Estruturados.
+    r = _resolve(name="AMW CREDITO ESTRUTURADO FIC FIM CP")
+    assert r.macro_class == "Renda Fixa"
+    assert r.macro_class != "Estruturados"
+
+
+def test_ivvb11_sp500_etf_is_renda_variavel_exposure_internacional():
+    # Asset class is RV (spec); the international S&P 500 exposure lives on the
+    # orthogonal `exposure` axis — B3 listing is domicile, not where the risk is.
+    r = _resolve(ticker="IVVB11")
+    assert r.macro_class == "Renda Variável"
+    assert r.exposure == "Internacional"
+    assert r.kind == "etf"
+    assert r.underlying_nature == "acoes"
+
+
+@pytest.mark.parametrize("ticker", ["HGLG11", "MXRF11"])
+def test_fiis_are_renda_variavel_subclasse_fii(ticker):
+    r = _resolve(ticker=ticker)
+    assert r.macro_class == "Renda Variável"
+    assert r.subclasse == "FII"
+    assert r.kind == "fii"
+
+
+# ── Trap regressions (spec §Armadilhas) ────────────────────────────
+
+
+def test_acao_ticker_is_rv():
+    r = _resolve(ticker="PETR4")
+    assert r.macro_class == "Renda Variável"
+    assert r.kind == "acao"
+    assert r.exposure == "Brasil"
+
+
+def test_bdr_is_rv_exposure_internacional():
+    # BDR: RV by class, but the holder bears foreign/USD risk → Internacional.
+    r = _resolve(ticker="AAPL34")
+    assert r.macro_class == "Renda Variável"
+    assert r.kind == "bdr"
+    assert r.exposure == "Internacional"
+
+
+def test_domestic_etf_is_brasil_exposure():
+    r = _resolve(ticker="BOVA11")
+    assert r.macro_class == "Renda Variável"
+    assert r.exposure == "Brasil"
+
+
+def test_internacional_funds_carry_internacional_exposure():
+    # Asset class varies (RV here), but the exposure axis flags Internacional.
+    for kw in ("ARBOR FIC FIA", "WHG GLOBAL EQUITY FIC FIA IE"):
+        r = _resolve(name=kw)
+        assert r.exposure == "Internacional"
+        assert r.macro_class != "Indefinido"
+
+
+def test_macro_class_has_no_internacional_value():
+    # Geography is exposure-only; "Internacional" must never appear as macro.
+    from findata.resolver import classify, normalize
+
+    for ident in ("ARBOR FIC FIA", "WHG GLOBAL FIC FIA IE", "VINCI GLOBAL FIM IE"):
+        r = classify(normalize(name=ident))
+        assert r.macro_class != "Internacional"
+
+
+def test_cra_cri_are_rf_isento():
+    r = _resolve(name="CRA AGRONEGOCIO RAIZEN IPCA")
+    assert r.macro_class == "Renda Fixa"
+    assert r.kind == "cra"
+    assert r.tax.isento is True
+
+
+def test_tesouro_ipca_is_rf_inflation():
+    r = _resolve(name="Tesouro IPCA+ 2035")
+    assert r.macro_class == "Renda Fixa"
+    assert r.kind == "tesouro"
+    assert r.subclasse == "Indexada à Inflação"
+
+
+def test_multimercado():
+    r = _resolve(name="KAPITALO ZETA FIC FIM")
+    assert r.macro_class == "Multimercado"
+
+
+def test_fip_is_alternativos():
+    r = _resolve(name="SPX FIP MULTIESTRATEGIA PARTICIPACOES")
+    assert r.macro_class == "Alternativos"
+
+
+# ── Adversarial-review regressions (token-collision traps) ─────────
+
+
+def test_bare_ie_token_outside_fund_is_not_internacional():
+    # "IE" must mean "Investimento no Exterior" only in a fund context.
+    r = _resolve(name="COMPANHIA IE ENERGIA SA")
+    assert r.macro_class != "Internacional"
+
+
+def test_bare_lc_lf_tokens_do_not_force_renda_fixa():
+    # Short tokens LC/LF used to misfire as bank paper.
+    r = _resolve(name="FUNDO GLOBAL LC MASTER FIC FIM")
+    assert r.kind != "cdb"
+
+
+def test_alcione_substring_is_not_lci():
+    # Substring "LCI" inside "ALCIONE" must not classify as LCI/LCA.
+    r = _resolve(name="ALCIONE FUNDO DE ACOES")
+    assert r.kind != "lci_lca"
+    assert r.macro_class == "Renda Variável"
+
+
+def test_arbor_credito_is_not_swept_into_global_equity_seed():
+    # ARBOR brand without the FIA structure must not hit the curated global seed.
+    r = _resolve(name="ARBOR CREDITO PRIVADO FIC FIM")
+    assert r.macro_class != "Internacional"
+
+
+def test_debenture_issuer_heuristic_keeps_confidence_below_short_circuit():
+    # Heuristic incentivada must stay below the cascade short-circuit so a wired
+    # provider can confirm the isento claim by ISIN.
+    r = _resolve(name="DEB PETROBRAS IPCA+")
+    assert r.debenture.incentivada_1243 is True  # spec still satisfied
+    assert r.confidence < 0.9  # but flagged for confirmation
+
+
+def test_unknown_is_indefinido_low_confidence():
+    r = _resolve(name="????")
+    assert r.macro_class == "Indefinido"
+    assert r.confidence < 0.5
+
+
+# ── Contract / determinism ─────────────────────────────────────────
+
+
+def test_output_carries_audit_fields():
+    r = _resolve(ticker="IFRA11")
+    assert r.source == "openfindata"
+    assert r.cascade == ["openfindata:curated"]
+    assert 0.0 <= r.confidence <= 1.0
+    assert r.as_of  # YYYY-MM-DD
+
+
+def test_classify_is_deterministic():
+    norm = normalize(ticker="IVVB11")
+    a, b = classify(norm), classify(norm)
+    assert a.model_dump(exclude={"as_of"}) == b.model_dump(exclude={"as_of"})
+
+
+def test_bare_ticker_passed_as_name_is_promoted():
+    # The consolidator often only has the statement label.
+    r = _resolve(name="IVVB11")
+    assert r.identifier_resolved.ticker == "IVVB11"
+    assert r.macro_class == "Renda Variável"
+
+
+def test_provider_chain_enriches_only_when_weak():
+    calls = {"n": 0}
+
+    async def fake_provider(norm, current):
+        calls["n"] += 1
+        return None  # noqa: RET501 — explicit "pass" signal in the provider protocol
+
+    # Confident core result → provider must be skipped.
+    asyncio.run(resolve_asset(ticker="IFRA11", providers=[fake_provider]))
+    assert calls["n"] == 0
+
+    # Weak result → provider is consulted.
+    asyncio.run(resolve_asset(name="????", providers=[fake_provider]))
+    assert calls["n"] == 1

From d2e8ebcbd9648eb878f310eddf4e9b7a390954b7 Mon Sep 17 00:00:00 2001
From: Roberto <robertoecf@users.noreply.github.com>
Date: Mon, 29 Jun 2026 21:06:24 -0300
Subject: [PATCH 2/7] feat(resolver): add fiscal-certainty axis to
 debenture/tax classification

The flat incentivada_1243/isento bools cannot tell a structurally certain
infra signal (explicit name / FI-Infra ETF) apart from a weak issuer+IPCA
heuristic. Add two Literal status fields that carry that certainty:

- DebentureInfo.lei_12431_status: confirmed | candidate | not_applicable | unknown
- TaxInfo.isento_status: confirmed_exempt | candidate_exempt | confirmed_taxable | unknown

Existing bool fields are kept unchanged (spec/test contract). Engine debenture
rule, CRA/CRI and LCI/LCA statutory exemptions, and the IFRA11 seed now stamp
these statuses; Tesouro-backed RF ETFs get not_applicable.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/findata/resolver/engine.py | 25 +++++++++++++++++++++++--
 src/findata/resolver/models.py | 21 +++++++++++++++++++++
 src/findata/resolver/seed.py   | 12 ++++++++++--
 tests/test_resolver.py         | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/src/findata/resolver/engine.py b/src/findata/resolver/engine.py
index eabb757..4234c33 100644
--- a/src/findata/resolver/engine.py
+++ b/src/findata/resolver/engine.py
@@ -174,6 +174,23 @@ def _infer_incentivada(
     return None, "", None
 
 
+def _apply_fiscal_certainty(basis: str | None, deb: dict[str, Any], tax: dict[str, Any]) -> None:
+    """Stamp the fiscal certainty axis on the debenture/tax sub-dicts.
+
+    An explicit infra signal is structurally certain (confirmed exempt); the
+    issuer+IPCA heuristic is only a candidate; with no incentivada signal it is a
+    plain debenture (12.431 not applicable, tax treatment still unknown).
+    """
+    if basis == "explicit":
+        deb["lei_12431_status"] = "confirmed"
+        tax["isento_status"] = "confirmed_exempt"
+    elif basis == "heuristic":
+        deb["lei_12431_status"] = "candidate"
+        tax["isento_status"] = "candidate_exempt"
+    else:
+        deb["lei_12431_status"] = "not_applicable"
+
+
 # ── The rule cascade ───────────────────────────────────────────────
 
 
@@ -226,6 +243,7 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
         if incentivada:
             deb["incentivada_1243"] = True
             tax["isento"] = True
+        _apply_fiscal_certainty(basis, deb, tax)
         # An *explicit* infra signal is high-confidence. The issuer+IPCA
         # heuristic is deliberately kept below the cascade short-circuit
         # threshold (_CONFIDENT_ENOUGH) so a wired provider re-checks the
@@ -260,7 +278,10 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "subclasse": "Crédito Privado",
             "exposure": "Brasil",
             "underlying_nature": "recebiveis",
-            "tax": {"isento": True},  # CRA/CRI: IR-exempt for PF
+            "tax": {
+                "isento": True,
+                "isento_status": "confirmed_exempt",
+            },  # CRA/CRI: IR-exempt for PF
             "confidence": 0.9,
             "notes": "Securitização (recebíveis) → Renda Fixa, isento p/ PF.",
         }
@@ -288,7 +309,7 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "subclasse": _subclasse_from_indexador(parse_indexador(n.name_folded)),
             "exposure": "Brasil",
             "underlying_nature": "credito",
-            "tax": {"isento": True},
+            "tax": {"isento": True, "isento_status": "confirmed_exempt"},
             "confidence": 0.9,
             "notes": "LCI/LCA → Renda Fixa, isento p/ PF.",
         }
diff --git a/src/findata/resolver/models.py b/src/findata/resolver/models.py
index 386dfcf..4940910 100644
--- a/src/findata/resolver/models.py
+++ b/src/findata/resolver/models.py
@@ -70,6 +70,21 @@
     "outro",
 ]
 
+# Lei-12.431 certainty axis for a debenture (or FI-Infra ETF underlying). The
+# legacy ``incentivada_1243`` bool cannot tell a *structurally certain* infra
+# signal apart from a *weak* issuer+IPCA heuristic — this status carries that
+# certainty: "confirmed" (explicit infra signal), "candidate" (heuristic, needs
+# ISIN confirmation), "not_applicable" (it is a debenture but not infra),
+# "unknown" (no debenture context decided it).
+Lei12431Status = Literal["confirmed", "candidate", "not_applicable", "unknown"]
+
+# Tax-exemption certainty axis for the PF holder. The legacy ``isento`` bool
+# cannot distinguish a statutory exemption (CRA/CRI/LCI-LCA/explicit 12.431) from
+# a merely *candidate* exemption resting on a heuristic — this status carries
+# that certainty: "confirmed_exempt", "candidate_exempt", "confirmed_taxable",
+# "unknown".
+IsentoStatus = Literal["confirmed_exempt", "candidate_exempt", "confirmed_taxable", "unknown"]
+
 
 class IdentifierResolved(BaseModel):
     """The identifiers the resolver could normalize/confirm from the input."""
@@ -93,6 +108,9 @@ class DebentureInfo(BaseModel):
     (or an FI-Infra ETF whose underlying *is* incentivada debentures)."""
 
     incentivada_1243: bool | None = None  # Lei 12.431 (infra) — IR-exempt for PF
+    # Certainty axis the bool can't carry: "confirmed" vs heuristic "candidate"
+    # vs "not_applicable" (a debenture, just not infra) vs "unknown".
+    lei_12431_status: Lei12431Status = "unknown"
     indexador: str | None = None  # IPCA+ | CDI+ | %CDI | PREFIXADO | SELIC
     vencimento: str | None = None  # YYYY-MM when known
 
@@ -101,6 +119,9 @@ class TaxInfo(BaseModel):
     """Tax treatment for the typical PF holder."""
 
     isento: bool | None = None  # True for Lei 12.431 / LCI-LCA / FII dividends etc.
+    # Certainty axis the bool can't carry: statutory "confirmed_exempt" vs
+    # heuristic "candidate_exempt" vs "confirmed_taxable" vs "unknown".
+    isento_status: IsentoStatus = "unknown"
 
 
 class AssetClassification(BaseModel):
diff --git a/src/findata/resolver/seed.py b/src/findata/resolver/seed.py
index c83a60c..2a5a89d 100644
--- a/src/findata/resolver/seed.py
+++ b/src/findata/resolver/seed.py
@@ -114,8 +114,16 @@ def _build_etf_seed() -> list[SeedEntry]:
             "confidence": 0.97,
         }
         if incentivada:
-            payload["debenture"] = {"incentivada_1243": True, "indexador": "IPCA+"}
-            payload["tax"] = {"isento": True}
+            payload["debenture"] = {
+                "incentivada_1243": True,
+                "lei_12431_status": "confirmed",
+                "indexador": "IPCA+",
+            }
+            payload["tax"] = {"isento": True, "isento_status": "confirmed_exempt"}
+        else:
+            # Tesouro-backed RF ETFs: not infra debentures, no statutory
+            # exemption. isento_status stays the default "unknown".
+            payload["debenture"] = {"lei_12431_status": "not_applicable"}
         entries.append(SeedEntry(ticker=ticker, payload=payload))
     return entries
 
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
index fc42dab..5eb82e9 100644
--- a/tests/test_resolver.py
+++ b/tests/test_resolver.py
@@ -192,6 +192,40 @@ def test_unknown_is_indefinido_low_confidence():
     assert r.confidence < 0.5
 
 
+# ── Fiscal-certainty axis (lei_12431_status / isento_status) ───────
+
+
+def test_deb_petrobras_heuristic_carries_candidate_certainty():
+    r = _resolve(name="DEB PETROBRAS IPCA+")
+    assert r.debenture and r.debenture.incentivada_1243 is True
+    assert r.debenture.lei_12431_status == "candidate"
+    assert r.tax.isento is True
+    assert r.tax.isento_status == "candidate_exempt"
+
+
+def test_ifra11_carries_confirmed_certainty():
+    r = _resolve(ticker="IFRA11", name="FI ITAUINFRA CI")
+    assert r.debenture and r.debenture.lei_12431_status == "confirmed"
+    assert r.tax.isento_status == "confirmed_exempt"
+
+
+def test_explicit_infra_debenture_is_confirmed():
+    r = _resolve(name="DEB INFRA ENERGIA INCENTIVADA IPCA+")
+    assert r.debenture and r.debenture.lei_12431_status == "confirmed"
+
+
+def test_cra_isento_status_is_confirmed_exempt():
+    r = _resolve(name="CRA AGRONEGOCIO RAIZEN IPCA")
+    assert r.tax.isento_status == "confirmed_exempt"
+
+
+def test_plain_non_infra_debenture_is_not_applicable():
+    r = _resolve(name="DEB LOJAS RENNER CDI+")
+    assert r.debenture and r.debenture.incentivada_1243 is None
+    assert r.debenture.lei_12431_status == "not_applicable"
+    assert r.tax.isento_status == "unknown"
+
+
 # ── Contract / determinism ─────────────────────────────────────────
 
 

From 7aec2ab1132344fe798f96a9b62af37a57ad947d Mon Sep 17 00:00:00 2001
From: Roberto <robertoecf@users.noreply.github.com>
Date: Mon, 29 Jun 2026 21:07:25 -0300
Subject: [PATCH 3/7] fix(resolver): keep debenture=None for Tesouro-backed RF
 ETFs

A non-debenture ETF should not carry a stub DebentureInfo just to hold
lei_12431_status=not_applicable; None is the honest 'no debenture facts'
shape. Status not_applicable remains on actual non-infra debentures.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/findata/resolver/seed.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/findata/resolver/seed.py b/src/findata/resolver/seed.py
index 2a5a89d..e67418e 100644
--- a/src/findata/resolver/seed.py
+++ b/src/findata/resolver/seed.py
@@ -120,10 +120,8 @@ def _build_etf_seed() -> list[SeedEntry]:
                 "indexador": "IPCA+",
             }
             payload["tax"] = {"isento": True, "isento_status": "confirmed_exempt"}
-        else:
-            # Tesouro-backed RF ETFs: not infra debentures, no statutory
-            # exemption. isento_status stays the default "unknown".
-            payload["debenture"] = {"lei_12431_status": "not_applicable"}
+        # Tesouro-backed RF ETFs hold no debenture, so `debenture` stays None
+        # (the honest "no debenture facts" shape) rather than a stub object.
         entries.append(SeedEntry(ticker=ticker, payload=payload))
     return entries
 

From f67cb822bde10caa41083dcedaabca56a0784a46 Mon Sep 17 00:00:00 2001
From: Roberto <robertoecf@users.noreply.github.com>
Date: Mon, 29 Jun 2026 21:14:20 -0300
Subject: [PATCH 4/7] feat(resolver): structured signals audit trail

Add a machine-readable `signals` list to AssetClassification alongside the
free-text `notes` and coarse `cascade`, so an auditor sees WHICH rule fired and
WHAT concrete token/phrase/ticker matched. Each `_rule_payload` branch records
its rule id + real matched evidence (via `_first_matching_token`/`_phrase`
helpers); the curated-seed path synthesizes a `curated_seed` signal in
`classify()` without mutating the frozen seed entry.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/findata/resolver/engine.py | 378 +++++++++++++++++++++------------
 src/findata/resolver/models.py |  15 ++
 tests/test_resolver.py         |  41 ++++
 3 files changed, 299 insertions(+), 135 deletions(-)

diff --git a/src/findata/resolver/engine.py b/src/findata/resolver/engine.py
index 4234c33..aa03b0a 100644
--- a/src/findata/resolver/engine.py
+++ b/src/findata/resolver/engine.py
@@ -39,6 +39,7 @@
     DebentureInfo,
     Exposure,
     IdentifierResolved,
+    Signal,
     TaxInfo,
 )
 from findata.resolver.normalize import NormalizedInput, normalize
@@ -191,6 +192,170 @@ def _apply_fiscal_certainty(basis: str | None, deb: dict[str, Any], tax: dict[st
         deb["lei_12431_status"] = "not_applicable"
 
 
+# ── Signal helpers ─────────────────────────────────────────────────
+
+
+def _first_matching_token(n: NormalizedInput, candidates: tuple[str, ...]) -> str | None:
+    """Return the first candidate that appears as a whole token, or ``None``."""
+    tset = set(n.tokens)
+    for c in candidates:
+        if c in tset:
+            return c
+    return None
+
+
+def _first_matching_phrase(n: NormalizedInput, candidates: tuple[str, ...]) -> str | None:
+    """Return the first candidate that is a substring of the folded name, or ``None``."""
+    for c in candidates:
+        if c in n.name_folded:
+            return c
+    return None
+
+
+def _signal(rule: str, evidence: str, detail: str | None = None) -> list[dict[str, Any]]:
+    """Build the single-entry ``signals`` list a rule branch records."""
+    entry: dict[str, Any] = {"rule": rule, "evidence": evidence}
+    if detail is not None:
+        entry["detail"] = detail
+    return [entry]
+
+
+def _debenture_payload(n: NormalizedInput, deb_evidence: str) -> dict[str, Any]:
+    """Classify a debenture → RF; parse indexador + Lei-12.431 incentivada."""
+    indexador = parse_indexador(n.name_folded)
+    incentivada, note, basis = _infer_incentivada(n, indexador)
+    deb: dict[str, Any] = {"indexador": indexador}
+    tax: dict[str, Any] = {}
+    if incentivada:
+        deb["incentivada_1243"] = True
+        tax["isento"] = True
+    _apply_fiscal_certainty(basis, deb, tax)
+    # An *explicit* infra signal is high-confidence. The issuer+IPCA heuristic is
+    # deliberately kept below the cascade short-circuit threshold
+    # (_CONFIDENT_ENOUGH) so a wired provider re-checks the isento claim by ISIN
+    # instead of it being taken as fact.
+    if basis == "explicit":
+        confidence = 0.92
+    elif basis == "heuristic":
+        confidence = 0.7
+    else:
+        confidence = 0.88
+    return {
+        "kind": "debenture",
+        "macro_class": "Renda Fixa",
+        "subclasse": _subclasse_from_indexador(indexador),
+        "exposure": "Brasil",
+        "underlying_nature": "credito",
+        "estrutura": "debenture",
+        "debenture": deb,
+        "tax": tax,
+        "confidence": confidence,
+        "notes": note or "Debênture → Renda Fixa.",
+        "signals": _signal(
+            "debenture", deb_evidence, detail=f"basis={basis};indexador={indexador}"
+        ),
+    }
+
+
+def _internacional_payload(n: NormalizedInput, intl_evidence: str) -> dict[str, Any]:
+    """Classify an internacional-mandate fund (IE / global keyword in a fund name)."""
+    equities = n.has_token("FIA") or n.name_contains("ACOES", "EQUITY")
+    rf = n.name_contains("DIVIDA EXTERNA", "RENDA FIXA", "BOND", "CREDITO", "DEBT")
+    if equities:
+        macro, subclasse, underlying = "Renda Variável", "Ações Global", "acoes"
+    elif rf:
+        macro, subclasse, underlying = "Renda Fixa", "Dívida Externa", "credito"
+    else:
+        macro, subclasse, underlying = "Multimercado", "Multimercado Global", "multiativos"
+    return {
+        "kind": "fundo",
+        "macro_class": macro,
+        "subclasse": subclasse,
+        "exposure": "Internacional",
+        "underlying_nature": underlying,
+        "estrutura": "IE" if n.has_token("IE") else "FIC",
+        "confidence": 0.9,
+        "notes": f"Mandato internacional (IE / global): {macro}, exposição Internacional.",
+        "signals": _signal("internacional", intl_evidence, detail=f"macro={macro}"),
+    }
+
+
+def _etf_payload(n: NormalizedInput, etf_evidence: str) -> dict[str, Any]:
+    """Classify an ETF matched by name → infer underlying from name keywords."""
+    rf = n.name_contains("RENDA FIXA", "DEBENTURE", "BOND", "IMA-", "IRF-", "TESOURO", "INFRA")
+    if rf:
+        sovereign = n.name_contains("TESOURO", "IMA-", "IRF-", "LFT", "NTN", "LTN")
+        credit = n.name_contains("DEBENTURE", "INFRA")
+        return {
+            "kind": "etf",
+            "macro_class": "Renda Fixa",
+            "subclasse": "ETF de renda fixa",
+            "exposure": "Brasil",
+            "underlying_nature": "debentures"
+            if credit
+            else ("tesouro" if sovereign else "credito"),
+            "estrutura": "ETF",
+            "confidence": 0.78,
+            "notes": "ETF com underlying de renda fixa (inferido do nome).",
+            "signals": _signal("etf_name", etf_evidence, detail="underlying=rf"),
+        }
+    intl = n.name_contains(*_GLOBAL_KEYWORDS, "S&P", "SP500", "NASDAQ", "MSCI", "EUA", "US ")
+    return {
+        "kind": "etf",
+        "macro_class": "Renda Variável",
+        "subclasse": "ETF de ações internacional" if intl else "ETF de ações",
+        "exposure": "Internacional" if intl else "Brasil",
+        "underlying_nature": "acoes",
+        "estrutura": "ETF",
+        "confidence": 0.72,
+        "notes": "ETF sem ticker no seed; underlying assumido = ações. Confirmar.",
+        "signals": _signal("etf_name", etf_evidence, detail="underlying=acoes"),
+    }
+
+
+def _ticker_payload(n: NormalizedInput) -> dict[str, Any]:
+    """Classify a bare ticker by its digit suffix (no name signal won)."""
+    suffix = n.ticker_digits_suffix
+    # 11 not in any curated ETF/RF list → overwhelmingly a FII.
+    if suffix == "11":
+        return {
+            "kind": "fii",
+            "macro_class": "Renda Variável",
+            "subclasse": "FII",
+            "exposure": "Brasil",
+            "underlying_nature": "imoveis",
+            "estrutura": "FII",
+            "confidence": 0.72,
+            "notes": "Ticker terminado em 11 fora do seed de ETFs → FII (heurística).",
+            "signals": _signal("ticker_suffix_11", f"ticker={n.ticker}"),
+        }
+    # BDR (34/35): recibo de ação estrangeira. RV por classe, mas o holder
+    # carrega risco cambial/exterior → Internacional por exposição (default;
+    # BDRs de empresa brasileira no exterior são exceção, não a regra).
+    if suffix in {"34", "35"}:
+        return {
+            "kind": "bdr",
+            "macro_class": "Renda Variável",
+            "subclasse": "BDR",
+            "exposure": "Internacional",
+            "underlying_nature": "acoes",
+            "confidence": 0.8,
+            "notes": "BDR (recibo de ação estrangeira) → RV, exposição Internacional.",
+            "signals": _signal("bdr", f"ticker={n.ticker}"),
+        }
+    # 3-8: ordinary/preferred share — ação brasileira.
+    return {
+        "kind": "acao",
+        "macro_class": "Renda Variável",
+        "subclasse": "Ações",
+        "exposure": "Brasil",
+        "underlying_nature": "acoes",
+        "confidence": 0.85,
+        "notes": "Ação listada na B3 → Renda Variável.",
+        "signals": _signal("acao", f"ticker={n.ticker}"),
+    }
+
+
 # ── The rule cascade ───────────────────────────────────────────────
 
 
@@ -213,17 +378,20 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "underlying_nature": "credito",
             "confidence": 0.9,
             "notes": "Name-trap: 'Crédito Estruturado' é crédito (RF), não COE/Estruturados.",
+            "signals": _signal("credito_estruturado_trap", "CREDITO ESTRUTURADO"),
         }
 
     # 1) COE / operações estruturadas → Estruturados, never an ETF.
-    if n.has_token("COE") or n.name_contains(
+    _coe_phrases = (
         "OPERACOES ESTRUTURADAS",
         "OPERACAO ESTRUTURADA",
         "CERTIFICADO DE OPERACOES",
         "CERT DE OPERACOES",
         "NOTA ESTRUTURADA",
         "NOTAS ESTRUTURADAS",
-    ):
+    )
+    if n.has_token("COE") or n.name_contains(*_coe_phrases):
+        coe_evidence = _first_matching_token(n, ("COE",)) or _first_matching_phrase(n, _coe_phrases)
         return {
             "kind": "coe",
             "macro_class": "Estruturados",
@@ -232,45 +400,20 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "estrutura": "COE",
             "confidence": 0.95,
             "notes": "COE (Certificado de Operações Estruturadas, CETIP) → Estruturados.",
+            "signals": _signal("coe", coe_evidence or "COE"),
         }
 
     # 2) Debenture → RF; parse indexador + incentivada.
-    if n.has_token("DEB", "DEBENTURE", "DEBENTURES", "DEBENT"):
-        indexador = parse_indexador(n.name_folded)
-        incentivada, note, basis = _infer_incentivada(n, indexador)
-        deb: dict[str, Any] = {"indexador": indexador}
-        tax: dict[str, Any] = {}
-        if incentivada:
-            deb["incentivada_1243"] = True
-            tax["isento"] = True
-        _apply_fiscal_certainty(basis, deb, tax)
-        # An *explicit* infra signal is high-confidence. The issuer+IPCA
-        # heuristic is deliberately kept below the cascade short-circuit
-        # threshold (_CONFIDENT_ENOUGH) so a wired provider re-checks the
-        # isento claim by ISIN instead of it being taken as fact.
-        if basis == "explicit":
-            confidence = 0.92
-        elif basis == "heuristic":
-            confidence = 0.7
-        else:
-            confidence = 0.88
-        return {
-            "kind": "debenture",
-            "macro_class": "Renda Fixa",
-            "subclasse": _subclasse_from_indexador(indexador),
-            "exposure": "Brasil",
-            "underlying_nature": "credito",
-            "estrutura": "debenture",
-            "debenture": deb,
-            "tax": tax,
-            "confidence": confidence,
-            "notes": note or "Debênture → Renda Fixa.",
-        }
+    _deb_tokens = ("DEB", "DEBENTURE", "DEBENTURES", "DEBENT")
+    if n.has_token(*_deb_tokens):
+        return _debenture_payload(n, _first_matching_token(n, _deb_tokens) or "DEB")
 
     # 3) Securitização (CRA/CRI) → RF.
-    if n.has_token("CRA", "CRI") or n.name_contains(
-        "CERT. RECEBIVEIS", "CERTIFICADO DE RECEBIVEIS"
-    ):
+    _cra_phrases = ("CERT. RECEBIVEIS", "CERTIFICADO DE RECEBIVEIS")
+    if n.has_token("CRA", "CRI") or n.name_contains(*_cra_phrases):
+        cra_evidence = _first_matching_token(n, ("CRA", "CRI")) or _first_matching_phrase(
+            n, _cra_phrases
+        )
         agro = n.has_token("CRA") or n.name_contains("AGRONEGOCIO")
         return {
             "kind": "cra" if agro else "cri",
@@ -284,13 +427,18 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             },  # CRA/CRI: IR-exempt for PF
             "confidence": 0.9,
             "notes": "Securitização (recebíveis) → Renda Fixa, isento p/ PF.",
+            "signals": _signal("cra_cri", cra_evidence or "CRA/CRI"),
         }
 
     # 4) Bank paper (CDB/RDB/LIG/Letra Financeira/Letra de Câmbio) → RF.
     #    NB: the bare 2-char tokens "LC"/"LF" are too collision-prone (they hit
     #    issuer names, share classes, internal codes), so they are matched only
     #    via their unambiguous phrases, never as bare tokens.
-    if n.has_token("CDB", "RDB", "LIG") or n.name_contains("LETRA FINANCEIRA", "LETRA DE CAMBIO"):
+    _bank_phrases = ("LETRA FINANCEIRA", "LETRA DE CAMBIO")
+    if n.has_token("CDB", "RDB", "LIG") or n.name_contains(*_bank_phrases):
+        bank_evidence = _first_matching_token(n, ("CDB", "RDB", "LIG")) or _first_matching_phrase(
+            n, _bank_phrases
+        )
         return {
             "kind": "cdb",
             "macro_class": "Renda Fixa",
@@ -299,10 +447,13 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "underlying_nature": "credito",
             "confidence": 0.88,
             "notes": "Emissão bancária → Renda Fixa.",
+            "signals": _signal("bank_paper", bank_evidence or "CDB"),
         }
-    if n.has_token("LCI", "LCA") or n.name_contains(
-        "LETRA DE CREDITO IMOBILIARIO", "LETRA DE CREDITO DO AGRONEGOCIO"
-    ):
+    _lci_phrases = ("LETRA DE CREDITO IMOBILIARIO", "LETRA DE CREDITO DO AGRONEGOCIO")
+    if n.has_token("LCI", "LCA") or n.name_contains(*_lci_phrases):
+        lci_evidence = _first_matching_token(n, ("LCI", "LCA")) or _first_matching_phrase(
+            n, _lci_phrases
+        )
         return {
             "kind": "lci_lca",
             "macro_class": "Renda Fixa",
@@ -312,12 +463,16 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "tax": {"isento": True, "isento_status": "confirmed_exempt"},
             "confidence": 0.9,
             "notes": "LCI/LCA → Renda Fixa, isento p/ PF.",
+            "signals": _signal("lci_lca", lci_evidence or "LCI/LCA"),
         }
 
     # 5) Tesouro / public bonds → RF.
-    if n.has_token("TESOURO", "NTN", "LTN", "LFT", "NTNB", "NTNF") or n.name_contains(
-        "TESOURO DIRETO", "TESOURO SELIC", "TESOURO IPCA", "TESOURO PREFIXADO"
-    ):
+    _tesouro_tokens = ("TESOURO", "NTN", "LTN", "LFT", "NTNB", "NTNF")
+    _tesouro_phrases = ("TESOURO DIRETO", "TESOURO SELIC", "TESOURO IPCA", "TESOURO PREFIXADO")
+    if n.has_token(*_tesouro_tokens) or n.name_contains(*_tesouro_phrases):
+        tesouro_evidence = _first_matching_token(n, _tesouro_tokens) or _first_matching_phrase(
+            n, _tesouro_phrases
+        )
         return {
             "kind": "tesouro",
             "macro_class": "Renda Fixa",
@@ -326,6 +481,7 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "underlying_nature": "tesouro",
             "confidence": 0.95,
             "notes": "Título público federal → Renda Fixa.",
+            "signals": _signal("tesouro", tesouro_evidence or "TESOURO"),
         }
 
     # 6) Internacional EXPOSURE — IE structure, or global keyword. Geography is
@@ -335,36 +491,22 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
     #    fund name is too collision-prone (e.g. "COMPANHIA IE ENERGIA SA").
     #    Runs before FIA/Ações so "FIC FIA IE" / "GLOBAL FIM" land here.
     fund_context = n.has_token(*_FUND_CONTEXT_TOKENS)
-    if fund_context and (
-        n.has_token("IE")
-        or n.name_contains(*_GLOBAL_KEYWORDS, "INVESTIMENTO NO EXTERIOR", "INV EXTERIOR")
-    ):
-        equities = n.has_token("FIA") or n.name_contains("ACOES", "EQUITY")
-        rf = n.name_contains("DIVIDA EXTERNA", "RENDA FIXA", "BOND", "CREDITO", "DEBT")
-        if equities:
-            macro, subclasse, underlying = "Renda Variável", "Ações Global", "acoes"
-        elif rf:
-            macro, subclasse, underlying = "Renda Fixa", "Dívida Externa", "credito"
-        else:
-            macro, subclasse, underlying = "Multimercado", "Multimercado Global", "multiativos"
-        return {
-            "kind": "fundo",
-            "macro_class": macro,
-            "subclasse": subclasse,
-            "exposure": "Internacional",
-            "underlying_nature": underlying,
-            "estrutura": "IE" if n.has_token("IE") else "FIC",
-            "confidence": 0.9,
-            "notes": f"Mandato internacional (IE / global): {macro}, exposição Internacional.",
-        }
+    _intl_phrases = (*_GLOBAL_KEYWORDS, "INVESTIMENTO NO EXTERIOR", "INV EXTERIOR")
+    if fund_context and (n.has_token("IE") or n.name_contains(*_intl_phrases)):
+        intl_evidence = _first_matching_token(n, ("IE",)) or _first_matching_phrase(
+            n, _intl_phrases
+        )
+        return _internacional_payload(n, intl_evidence or "IE")
 
     # 7) FII (by name; ticker-only 11s are caught at step 12).
-    if n.has_token("FII") or n.name_contains(
+    _fii_phrases = (
         "FUNDO IMOBILIARIO",
         "FDO INV IMOB",
         "FUNDO DE INVESTIMENTO IMOBILIARIO",
         "INVESTIMENTO IMOBILIARIO",
-    ):
+    )
+    if n.has_token("FII") or n.name_contains(*_fii_phrases):
+        fii_evidence = _first_matching_token(n, ("FII",)) or _first_matching_phrase(n, _fii_phrases)
         return {
             "kind": "fii",
             "macro_class": "Renda Variável",
@@ -374,40 +516,18 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "estrutura": "FII",
             "confidence": 0.92,
             "notes": "Fundo Imobiliário → Renda Variável (subclasse FII).",
+            "signals": _signal("fii_name", fii_evidence or "FII"),
         }
 
     # 8) ETF by name, no curated hit → infer underlying from name keywords.
-    if n.has_token("ETF") or n.name_contains("ISHARES", "INDEX FUND"):
-        rf = n.name_contains("RENDA FIXA", "DEBENTURE", "BOND", "IMA-", "IRF-", "TESOURO", "INFRA")
-        if rf:
-            sovereign = n.name_contains("TESOURO", "IMA-", "IRF-", "LFT", "NTN", "LTN")
-            credit = n.name_contains("DEBENTURE", "INFRA")
-            return {
-                "kind": "etf",
-                "macro_class": "Renda Fixa",
-                "subclasse": "ETF de renda fixa",
-                "exposure": "Brasil",
-                "underlying_nature": "debentures"
-                if credit
-                else ("tesouro" if sovereign else "credito"),
-                "estrutura": "ETF",
-                "confidence": 0.78,
-                "notes": "ETF com underlying de renda fixa (inferido do nome).",
-            }
-        intl = n.name_contains(*_GLOBAL_KEYWORDS, "S&P", "SP500", "NASDAQ", "MSCI", "EUA", "US ")
-        return {
-            "kind": "etf",
-            "macro_class": "Renda Variável",
-            "subclasse": "ETF de ações internacional" if intl else "ETF de ações",
-            "exposure": "Internacional" if intl else "Brasil",
-            "underlying_nature": "acoes",
-            "estrutura": "ETF",
-            "confidence": 0.72,
-            "notes": "ETF sem ticker no seed; underlying assumido = ações. Confirmar.",
-        }
+    _etf_phrases = ("ISHARES", "INDEX FUND")
+    if n.has_token("ETF") or n.name_contains(*_etf_phrases):
+        etf_evidence = _first_matching_token(n, ("ETF",)) or _first_matching_phrase(n, _etf_phrases)
+        return _etf_payload(n, etf_evidence or "ETF")
 
     # 9) FIDC → RF (direitos creditórios, natureza de crédito).
     if n.has_token("FIDC") or n.name_contains("DIREITOS CREDITORIOS"):
+        fidc_evidence = _first_matching_token(n, ("FIDC",)) or "DIREITOS CREDITORIOS"
         return {
             "kind": "fundo",
             "macro_class": "Renda Fixa",
@@ -417,10 +537,13 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "estrutura": "FIDC",
             "confidence": 0.85,
             "notes": "FIDC (direitos creditórios) → Renda Fixa (crédito).",
+            "signals": _signal("fidc", fidc_evidence),
         }
 
     # 10) FIP → Alternativos (private equity).
-    if n.has_token("FIP") or n.name_contains("PARTICIPACOES", "PRIVATE EQUITY"):
+    _fip_phrases = ("PARTICIPACOES", "PRIVATE EQUITY")
+    if n.has_token("FIP") or n.name_contains(*_fip_phrases):
+        fip_evidence = _first_matching_token(n, ("FIP",)) or _first_matching_phrase(n, _fip_phrases)
         return {
             "kind": "fundo",
             "macro_class": "Alternativos",
@@ -429,10 +552,13 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "estrutura": "FIP",
             "confidence": 0.88,
             "notes": "FIP (participações) → Alternativos.",
+            "signals": _signal("fip", fip_evidence or "FIP"),
         }
 
     # 11) Multimercado.
-    if n.has_token("FIM") or n.name_contains("MULTIMERCADO", "MULTIESTRATEGIA", "MACRO"):
+    _mm_phrases = ("MULTIMERCADO", "MULTIESTRATEGIA", "MACRO")
+    if n.has_token("FIM") or n.name_contains(*_mm_phrases):
+        mm_evidence = _first_matching_token(n, ("FIM",)) or _first_matching_phrase(n, _mm_phrases)
         return {
             "kind": "fundo",
             "macro_class": "Multimercado",
@@ -441,10 +567,13 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "estrutura": "FIM",
             "confidence": 0.85,
             "notes": "Multimercado.",
+            "signals": _signal("multimercado", mm_evidence or "FIM"),
         }
 
     # 12) Ações / FIA (domestic equities).
-    if n.has_token("FIA") or n.name_contains("FUNDO DE ACOES", "ACOES", "EQUITY"):
+    _fia_phrases = ("FUNDO DE ACOES", "ACOES", "EQUITY")
+    if n.has_token("FIA") or n.name_contains(*_fia_phrases):
+        fia_evidence = _first_matching_token(n, ("FIA",)) or _first_matching_phrase(n, _fia_phrases)
         return {
             "kind": "fundo",
             "macro_class": "Renda Variável",
@@ -454,46 +583,12 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "estrutura": "FIA",
             "confidence": 0.85,
             "notes": "Fundo de Ações → Renda Variável.",
+            "signals": _signal("fia", fia_evidence or "FIA"),
         }
 
     # 13) Ticker shapes (no name signal won above).
-    suffix = n.ticker_digits_suffix
     if n.ticker:
-        # 11 not in any curated ETF/RF list → overwhelmingly a FII.
-        if suffix == "11":
-            return {
-                "kind": "fii",
-                "macro_class": "Renda Variável",
-                "subclasse": "FII",
-                "exposure": "Brasil",
-                "underlying_nature": "imoveis",
-                "estrutura": "FII",
-                "confidence": 0.72,
-                "notes": "Ticker terminado em 11 fora do seed de ETFs → FII (heurística).",
-            }
-        # BDR (34/35): recibo de ação estrangeira. RV por classe, mas o holder
-        # carrega risco cambial/exterior → Internacional por exposição (default;
-        # BDRs de empresa brasileira no exterior são exceção, não a regra).
-        if suffix in {"34", "35"}:
-            return {
-                "kind": "bdr",
-                "macro_class": "Renda Variável",
-                "subclasse": "BDR",
-                "exposure": "Internacional",
-                "underlying_nature": "acoes",
-                "confidence": 0.8,
-                "notes": "BDR (recibo de ação estrangeira) → RV, exposição Internacional.",
-            }
-        # 3-8: ordinary/preferred share — ação brasileira.
-        return {
-            "kind": "acao",
-            "macro_class": "Renda Variável",
-            "subclasse": "Ações",
-            "exposure": "Brasil",
-            "underlying_nature": "acoes",
-            "confidence": 0.85,
-            "notes": "Ação listada na B3 → Renda Variável.",
-        }
+        return _ticker_payload(n)
 
     # 14) Nothing matched — honest "I don't know" for HITL review.
     return {
@@ -501,6 +596,7 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
         "macro_class": "Indefinido",
         "confidence": 0.2,
         "notes": "Sem sinal estrutural suficiente; requer revisão (human-in-the-loop).",
+        "signals": _signal("fallback", "no_structural_signal"),
     }
 
 
@@ -538,6 +634,7 @@ def _assemble(norm: NormalizedInput, payload: dict[str, Any], step: str) -> Asse
         confidence=payload.get("confidence", 0.5),
         as_of=datetime.now(_BR_TZ).date().isoformat(),
         cascade=[step],
+        signals=[Signal(**s) for s in payload.get("signals", [])],
         notes=payload.get("notes"),
     )
 
@@ -550,7 +647,18 @@ def classify(norm: NormalizedInput) -> AssetClassification:
     """
     seed = lookup_seed(ticker=norm.ticker, cnpj=norm.cnpj, name_folded=norm.name_folded)
     if seed is not None:
-        return _assemble(norm, seed.payload, step="openfindata:curated")
+        # Synthesize the curated_seed signal here (the frozen seed entry must not
+        # be mutated): describe HOW the entry matched — by ticker, then CNPJ, then
+        # name substrings. A copy keeps the seed payload immutable.
+        if seed.ticker and norm.ticker == seed.ticker:
+            evidence = f"ticker={norm.ticker}"
+        elif seed.cnpj and norm.cnpj == seed.cnpj:
+            evidence = f"cnpj={norm.cnpj}"
+        else:
+            evidence = f"name:{'+'.join(seed.name_substrings)}"
+        payload = {**seed.payload}
+        payload.setdefault("signals", [{"rule": "curated_seed", "evidence": evidence}])
+        return _assemble(norm, payload, step="openfindata:curated")
     return _assemble(norm, _rule_payload(norm), step="openfindata:rules")
 
 
diff --git a/src/findata/resolver/models.py b/src/findata/resolver/models.py
index 4940910..2d00ee7 100644
--- a/src/findata/resolver/models.py
+++ b/src/findata/resolver/models.py
@@ -86,6 +86,19 @@
 IsentoStatus = Literal["confirmed_exempt", "candidate_exempt", "confirmed_taxable", "unknown"]
 
 
+class Signal(BaseModel):
+    """One structured audit entry: which rule fired and what evidence matched.
+
+    Unlike the free-text ``notes``, a ``Signal`` is machine-readable so an
+    auditor can see WHICH rule decided and WHAT concrete token/phrase/ticker
+    triggered it (e.g. rule="debenture", evidence="DEB").
+    """
+
+    rule: str  # the rule id that fired (e.g. "debenture", "curated_seed")
+    evidence: str  # the concrete token/phrase/ticker that matched (e.g. "DEB")
+    detail: str | None = None  # optional extra (e.g. "basis=heuristic", "indexador=IPCA+")
+
+
 class IdentifierResolved(BaseModel):
     """The identifiers the resolver could normalize/confirm from the input."""
 
@@ -141,5 +154,7 @@ class AssetClassification(BaseModel):
     as_of: str  # YYYY-MM-DD
     # Audit trail: ordered list of resolution steps actually attempted.
     cascade: list[str] = Field(default_factory=list)
+    # Structured audit trail: which rule fired and what evidence matched.
+    signals: list[Signal] = Field(default_factory=list)
     # Free-text rationale, e.g. which trap was avoided or which signal decided.
     notes: str | None = None
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
index 5eb82e9..4b863a0 100644
--- a/tests/test_resolver.py
+++ b/tests/test_resolver.py
@@ -264,3 +264,44 @@ async def fake_provider(norm, current):
     # Weak result → provider is consulted.
     asyncio.run(resolve_asset(name="????", providers=[fake_provider]))
     assert calls["n"] == 1
+
+
+# ── Structured signals trail ───────────────────────────────────────
+
+
+def test_curated_seed_emits_curated_signal():
+    r = _resolve(ticker="IFRA11")
+    assert r.signals
+    assert r.signals[0].rule == "curated_seed"
+    assert "IFRA11" in r.signals[0].evidence
+
+
+def test_debenture_signal_records_evidence_and_detail():
+    r = _resolve(name="DEB PETROBRAS IPCA+")
+    deb = [s for s in r.signals if s.rule == "debenture"]
+    assert deb
+    assert deb[0].evidence == "DEB"
+    assert deb[0].detail is not None
+    assert "basis=" in deb[0].detail
+    assert "IPCA+" in deb[0].detail
+
+
+def test_coe_signal_fires():
+    r = _resolve(name="INVEST. ESTRUTURADOS COE BTG")
+    assert any(s.rule == "coe" for s in r.signals)
+
+
+def test_credito_estruturado_trap_signal_carries_phrase():
+    r = _resolve(name="AMW CREDITO ESTRUTURADO FIC FIM CP")
+    trap = [s for s in r.signals if s.rule == "credito_estruturado_trap"]
+    assert trap
+    assert "CREDITO ESTRUTURADO" in trap[0].evidence
+
+
+@pytest.mark.parametrize(
+    "ident",
+    ["PETR4", "HGLG11", "Tesouro IPCA+ 2035", "KAPITALO ZETA FIC FIM"],
+)
+def test_every_result_carries_at_least_one_signal(ident):
+    r = classify(normalize(name=ident))
+    assert len(r.signals) >= 1

From 70361eb799d2636d07daba8aefec242a4035fad5 Mon Sep 17 00:00:00 2001
From: Roberto <robertoecf@users.noreply.github.com>
Date: Mon, 29 Jun 2026 21:23:39 -0300
Subject: [PATCH 5/7] fix(resolver): address review-bot findings + doc drift
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cross-host CI reviewers (Gemini, CodeRabbit) on PR #33:
- Gate the FIP/Multimercado/FIA keyword paths ("PARTICIPACOES", "MACRO",
  "ACOES"/"EQUITY") on a fund context so company/trade names ("Randon
  Participações SA", "Macro Atacadista") are not misclassified as funds.
- Derive the Tesouro subclasse from the bond code when the name lacks the
  index word: NTN-B → Indexada à Inflação, LFT → Pós-fixada, LTN/NTN-F →
  Prefixada; unmapped public bonds → "Título Público", never "Crédito Privado".
- Drop the redundant second fold() in lookup_seed (input is pre-folded).
- Fix doc drift: macro_class no longer lists "Internacional" (it is the
  exposure axis) in the REST router, MCP tool summary/docstring, package
  docstring, and MCP_SURFACE.md; add a language to the fenced block.

6 new regression tests. 272 → 278 passed, 15 deselected. ruff + mypy clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                        | 33 ++++++++++++++----------
 docs/MCP_SURFACE.md                 |  4 +--
 src/findata/api/mcp_app.py          | 25 ++++++++++--------
 src/findata/api/routers/resolver.py |  7 ++---
 src/findata/resolver/__init__.py    | 10 +++++---
 src/findata/resolver/engine.py      | 40 ++++++++++++++++++++++-------
 src/findata/resolver/seed.py        |  7 +++--
 tests/test_resolver.py              | 38 +++++++++++++++++++++++++++
 8 files changed, 118 insertions(+), 46 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a2f2a67..e24b8cb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,19 +21,26 @@ adheres to [Semantic Versioning](https://semver.org/).
 - **Asset-classification resolver** — `findata.resolver.resolve_asset()`,
   `GET /resolver/resolve`, and the `resolve_asset` MCP tool. Turns any
   Brazilian asset identifier (ticker/CNPJ/ISIN/name) into a classification
-  mapped to the consolidation macro taxonomy (Renda Fixa, Renda Variável,
-  Multimercado, Alternativos, Estruturados) plus an orthogonal `exposure`
-  axis (Brasil/Internacional), `subclasse`, `underlying_nature`, debenture
-  Lei-12.431 facts, `source`, `confidence`, and the `cascade` walked.
-  Deterministic and offline at its core (a curated ETF/global-fund seed +
-  structural rules), with an injectable external-provider chain (Mais
-  Retorno / CVM-B3 / restricted web search) for low-confidence fallback.
-  Classifies ETFs/funds by underlying (IFRA11 debêntures → RF; IVVB11 ações
-  → RV + Internacional), defends the COE-never-ETF and "Crédito Estruturado"
-  name-traps, and keeps the Lei-12.431 isento flag below the cascade
-  short-circuit when only inferred by heuristic. Hardened after adversarial
-  review: bare-token collisions (`IE`/`LC`/`LF`/substring `LCI`) removed,
-  API length caps, `as_of` stamped in America/Sao_Paulo.
+  mapped to the consolidation taxonomy. `macro_class` is the asset class
+  (Renda Fixa, Renda Variável, Multimercado, Alternativos, Estruturados);
+  geography is the orthogonal `exposure` axis (Brasil/Internacional), so
+  IVVB11 and BDRs are RV + Internacional and a global-mandate FIA is RV +
+  Internacional. Also returns `subclasse`, `underlying_nature`, debenture
+  Lei-12.431 facts with a **certainty status** (`lei_12431_status`:
+  confirmed/candidate/not_applicable; `isento_status`), `source`,
+  `confidence`, the `cascade` walked, and a structured `signals` trail
+  (which rule fired on what evidence). Deterministic and offline at its core
+  (a curated ETF/global-fund seed + structural rules), with an injectable
+  external-provider chain (Mais Retorno / CVM-B3 / restricted web search)
+  for low-confidence fallback. Classifies ETFs/funds by underlying (IFRA11
+  debêntures → RF; IVVB11 ações → RV), defends the COE-never-ETF and "Crédito
+  Estruturado" name-traps, and keeps a heuristic Lei-12.431 isento as a
+  `candidate` below the cascade short-circuit so a provider can confirm it by
+  ISIN. Hardened after cross-host adversarial review and CI review bots:
+  bare-token collisions (`IE`/`LC`/`LF`/`MACRO`/`ACOES`/`PARTICIPACOES`,
+  substring `LCI`) gated on fund context, public-bond subclasse derived from
+  the bond code (NTN-B → inflation), API length caps, `as_of` stamped in
+  America/Sao_Paulo.
 - **ANBIMA Títulos Públicos (TPF) secondary market** — `get_tpf()`,
   `GET /anbima/tpf`, and `findata anbima tpf`. Daily reference rates for
   outstanding federal government bonds (LTN, LFT, NTN-B, NTN-C, NTN-F) from
diff --git a/docs/MCP_SURFACE.md b/docs/MCP_SURFACE.md
index fbbb90a..31c9961 100644
--- a/docs/MCP_SURFACE.md
+++ b/docs/MCP_SURFACE.md
@@ -55,9 +55,9 @@ safe. **The 95 REST routes that back the CLI and HTTP consumers never change.**
 
 ## The 25 curated tools
 
-```
+```text
 registry_lookup          ← start here: CNPJ / ticker / code / name → entities
-resolve_asset            ← classify an asset into the macro taxonomy + exposure
+resolve_asset            ← classify an asset: macro asset class + exposure
 
 bcb_series   bcb_ptax   bcb_focus                       (BCB: 12 → 3)
 cvm_company  cvm_financials  cvm_fund  cvm_structured_fund   (CVM: 22 → 4)
diff --git a/src/findata/api/mcp_app.py b/src/findata/api/mcp_app.py
index b82115e..abef0e4 100644
--- a/src/findata/api/mcp_app.py
+++ b/src/findata/api/mcp_app.py
@@ -102,7 +102,7 @@ async def registry_lookup(
     "/resolver/resolve",
     operation_id="resolve_asset",
     response_model=None,
-    summary="Classify a Brazilian asset into the macro taxonomy (RF/RV/Multi/Intl/Alt/Estrut)",
+    summary="Classify a Brazilian asset: asset class + Brasil/Internacional exposure",
 )
 async def resolve_asset_tool(
     name: str | None = Query(
@@ -112,16 +112,19 @@ async def resolve_asset_tool(
     cnpj: str | None = Query(None, max_length=32, description="Fund CNPJ (masked or not)"),
     isin: str | None = Query(None, max_length=16, description="ISIN, e.g. BR..."),
 ) -> Any:
-    """Turn any asset identifier into a classification already mapped to the
-    consolidation macro taxonomy: Renda Fixa, Renda Variável, Multimercado,
-    Internacional, Alternativos, Estruturados.
-
-    Returns ``macro_class`` + ``subclasse`` + ``underlying_nature`` (splits
-    ETF-de-ações from ETF-de-debêntures), debenture/Lei-12.431 facts, ``source``,
-    ``confidence``, and the ``cascade`` walked — deterministic and cacheable.
-    Pass any subset of identifiers; a bare ticker/CNPJ given as ``name`` is
-    auto-detected. Use this (not ``registry_lookup``) when you need the asset's
-    macro class, not its registry entity.
+    """Turn any asset identifier into a classification mapped to the
+    consolidation taxonomy. ``macro_class`` is the asset class only — Renda Fixa,
+    Renda Variável, Multimercado, Alternativos, Estruturados; geography is the
+    separate ``exposure`` axis (Brasil/Internacional), so e.g. IVVB11 is RV +
+    Internacional.
+
+    Returns ``macro_class`` + ``exposure`` + ``subclasse`` + ``underlying_nature``
+    (splits ETF-de-ações from ETF-de-debêntures), debenture/Lei-12.431 facts (with
+    a confirmed/candidate certainty status), ``source``, ``confidence``, the
+    ``cascade`` walked, and structured ``signals`` (which rule fired on what
+    evidence) — deterministic and cacheable. Pass any subset of identifiers; a
+    bare ticker/CNPJ given as ``name`` is auto-detected. Use this (not
+    ``registry_lookup``) when you need the asset's class, not its registry entity.
     """
     return await resolve_asset(name=name, ticker=ticker, cnpj=cnpj, isin=isin)
 
diff --git a/src/findata/api/routers/resolver.py b/src/findata/api/routers/resolver.py
index a47c8c3..ea14447 100644
--- a/src/findata/api/routers/resolver.py
+++ b/src/findata/api/routers/resolver.py
@@ -27,9 +27,10 @@ async def resolve(
     """Classifica um ativo na taxonomia macro Wealthuman.
 
     Aceita qualquer identificador (``name``/``ticker``/``cnpj``/``isin``) e
-    devolve ``macro_class`` já mapeada (Renda Fixa, Renda Variável, Multimercado,
-    Internacional, Alternativos, Estruturados) + subclasse, underlying,
-    debênture/Lei 12.431, ``source``, ``confidence`` e a cascata percorrida.
+    devolve ``macro_class`` (classe de ativo: Renda Fixa, Renda Variável,
+    Multimercado, Alternativos, Estruturados) + ``exposure`` (eixo ortogonal de
+    geografia: Brasil/Internacional) + subclasse, underlying, debênture/Lei
+    12.431, ``source``, ``confidence``, ``signals`` e a cascata percorrida.
     Determinístico e cacheável.
     """
     return await resolve_asset(name=name, ticker=ticker, cnpj=cnpj, isin=isin)
diff --git a/src/findata/resolver/__init__.py b/src/findata/resolver/__init__.py
index 003c19b..a75afa9 100644
--- a/src/findata/resolver/__init__.py
+++ b/src/findata/resolver/__init__.py
@@ -1,10 +1,12 @@
 """Wealthuman asset-classification resolver.
 
 ``resolve_asset(identifier)`` turns any Brazilian asset identifier (ticker,
-CNPJ, ISIN, or bare name) into a classification already mapped to the Wealthuman
-macro taxonomy (Renda Fixa, Renda Variável, Multimercado, Internacional,
-Alternativos, Estruturados) plus subclasse, underlying nature, debenture /
-Lei-12.431 facts, source, confidence, and an audit cascade.
+CNPJ, ISIN, or bare name) into a classification mapped to the Wealthuman
+taxonomy: ``macro_class`` is the asset class (Renda Fixa, Renda Variável,
+Multimercado, Alternativos, Estruturados); geography is the orthogonal
+``exposure`` axis (Brasil/Internacional). Plus subclasse, underlying nature,
+debenture / Lei-12.431 facts (with a certainty status), source, confidence, an
+audit cascade, and structured signals.
 
 Deterministic, cacheable, auditable, no PII. See ``openfindata-mcp-spec.md``.
 """
diff --git a/src/findata/resolver/engine.py b/src/findata/resolver/engine.py
index aa03b0a..6cf0646 100644
--- a/src/findata/resolver/engine.py
+++ b/src/findata/resolver/engine.py
@@ -132,14 +132,30 @@ def parse_indexador(name_folded: str) -> str | None:
     return None
 
 
-def _subclasse_from_indexador(indexador: str | None) -> str:
+def _subclasse_from_indexador(indexador: str | None, default: str = "Crédito Privado") -> str:
     if indexador == "IPCA+":
         return "Indexada à Inflação"
     if indexador in {"%CDI", "CDI+", "SELIC"}:
         return "Pós-fixada"
     if indexador == "PREFIXADO":
         return "Prefixada"
-    return "Crédito Privado"
+    return default
+
+
+# Public-bond type → indexador, for names that carry the bond code but not the
+# index word (e.g. "NTN-B 2035" has no "IPCA"). Folded substrings, so "NTN-B"
+# and "NTNB" both hit. NTN-C (IGP-M) is left to the generic path.
+def _tesouro_indexador(n: NormalizedInput) -> str | None:
+    explicit = parse_indexador(n.name_folded)
+    if explicit is not None:
+        return explicit
+    if n.name_contains("NTN-B", "NTNB"):
+        return "IPCA+"
+    if n.name_contains("LFT"):
+        return "SELIC"
+    if n.name_contains("NTN-F", "NTNF", "LTN"):
+        return "PREFIXADO"
+    return None
 
 
 def _infer_incentivada(
@@ -476,7 +492,9 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
         return {
             "kind": "tesouro",
             "macro_class": "Renda Fixa",
-            "subclasse": _subclasse_from_indexador(parse_indexador(n.name_folded)),
+            # Public bonds carry the index in their type code, not always a word;
+            # default to "Título Público", never the credit-private subclasse.
+            "subclasse": _subclasse_from_indexador(_tesouro_indexador(n), default="Título Público"),
             "exposure": "Brasil",
             "underlying_nature": "tesouro",
             "confidence": 0.95,
@@ -540,9 +558,11 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "signals": _signal("fidc", fidc_evidence),
         }
 
-    # 10) FIP → Alternativos (private equity).
+    # 10) FIP → Alternativos (private equity). The FIP token is unambiguous; the
+    #     "PARTICIPACOES"/"PRIVATE EQUITY" phrases need a fund context so a holding
+    #     company ("XYZ Participações SA") is not classified as a fund.
     _fip_phrases = ("PARTICIPACOES", "PRIVATE EQUITY")
-    if n.has_token("FIP") or n.name_contains(*_fip_phrases):
+    if n.has_token("FIP") or (fund_context and n.name_contains(*_fip_phrases)):
         fip_evidence = _first_matching_token(n, ("FIP",)) or _first_matching_phrase(n, _fip_phrases)
         return {
             "kind": "fundo",
@@ -555,9 +575,10 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "signals": _signal("fip", fip_evidence or "FIP"),
         }
 
-    # 11) Multimercado.
+    # 11) Multimercado. FIM token is unambiguous; the phrases (esp. "MACRO",
+    #     common in trade names like "Macro Atacadista") need a fund context.
     _mm_phrases = ("MULTIMERCADO", "MULTIESTRATEGIA", "MACRO")
-    if n.has_token("FIM") or n.name_contains(*_mm_phrases):
+    if n.has_token("FIM") or (fund_context and n.name_contains(*_mm_phrases)):
         mm_evidence = _first_matching_token(n, ("FIM",)) or _first_matching_phrase(n, _mm_phrases)
         return {
             "kind": "fundo",
@@ -570,9 +591,10 @@ def _rule_payload(norm: NormalizedInput) -> dict[str, Any]:
             "signals": _signal("multimercado", mm_evidence or "FIM"),
         }
 
-    # 12) Ações / FIA (domestic equities).
+    # 12) Ações / FIA (domestic equities). FIA token is unambiguous; the bare
+    #     "ACOES"/"EQUITY" keywords need a fund context (avoid company names).
     _fia_phrases = ("FUNDO DE ACOES", "ACOES", "EQUITY")
-    if n.has_token("FIA") or n.name_contains(*_fia_phrases):
+    if n.has_token("FIA") or (fund_context and n.name_contains(*_fia_phrases)):
         fia_evidence = _first_matching_token(n, ("FIA",)) or _first_matching_phrase(n, _fia_phrases)
         return {
             "kind": "fundo",
diff --git a/src/findata/resolver/seed.py b/src/findata/resolver/seed.py
index e67418e..05f9758 100644
--- a/src/findata/resolver/seed.py
+++ b/src/findata/resolver/seed.py
@@ -28,8 +28,6 @@
 from dataclasses import dataclass, field
 from typing import Any
 
-from findata.resolver.normalize import fold
-
 
 @dataclass(frozen=True)
 class SeedEntry:
@@ -168,8 +166,9 @@ def lookup_seed(*, ticker: str | None, cnpj: str | None, name_folded: str) -> Se
     if cnpj and cnpj in _BY_CNPJ:
         return _BY_CNPJ[cnpj]
     if name_folded:
-        folded = fold(name_folded)
+        # ``name_folded`` is already ASCII-folded/uppercased by the caller
+        # (normalize()), so match directly — no second fold.
         for entry in _NAME_ENTRIES:
-            if all(sub in folded for sub in entry.name_substrings):
+            if all(sub in name_folded for sub in entry.name_substrings):
                 return entry
     return None
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
index 4b863a0..b2b304f 100644
--- a/tests/test_resolver.py
+++ b/tests/test_resolver.py
@@ -192,6 +192,44 @@ def test_unknown_is_indefinido_low_confidence():
     assert r.confidence < 0.5
 
 
+# ── Review-bot regressions (token collisions + Tesouro subclasse) ──
+
+
+def test_ntnb_bond_code_maps_to_inflation_subclasse():
+    # "NTN-B" carries no "IPCA" word, but it is an inflation-linked public bond.
+    r = _resolve(name="NTN-B 2035")
+    assert r.kind == "tesouro"
+    assert r.subclasse == "Indexada à Inflação"
+
+
+def test_lft_and_ltn_bond_codes_map_to_right_subclasse():
+    assert _resolve(name="LFT 2029").subclasse == "Pós-fixada"
+    assert _resolve(name="LTN 2028").subclasse == "Prefixada"
+
+
+def test_tesouro_without_indexador_is_titulo_publico_not_credito_privado():
+    # NTN-C (IGP-M) isn't mapped → generic public-bond subclasse, never credit.
+    r = _resolve(name="NTN-C 2031")
+    assert r.kind == "tesouro"
+    assert r.subclasse == "Título Público"
+
+
+def test_holding_company_participacoes_is_not_fip():
+    # "Participações" in a company name (no fund context) must not be a FIP.
+    r = _resolve(name="RANDON PARTICIPACOES SA")
+    assert r.macro_class != "Alternativos"
+
+
+def test_macro_trade_name_is_not_multimercado():
+    r = _resolve(name="MACRO ATACADISTA DISTRIBUIDORA SA")
+    assert r.macro_class != "Multimercado"
+
+
+def test_bare_acoes_keyword_without_fund_context_is_not_fia():
+    r = _resolve(name="EMPRESA BRASILEIRA ACOES ON SA")
+    assert not (r.kind == "fundo" and r.subclasse == "Ações")
+
+
 # ── Fiscal-certainty axis (lei_12431_status / isento_status) ───────
 
 

From c80207619f2f69163a75f1e2854d983126af025e Mon Sep 17 00:00:00 2001
From: Roberto <robertoecf@users.noreply.github.com>
Date: Mon, 29 Jun 2026 21:24:43 -0300
Subject: [PATCH 6/7] docs(resolver): client-facing contract for resolve_asset
 (Wealthuman deliverable)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add docs/RESOLVER.md: the MCP/REST/Python contract the consolidator calls —
input, full output schema with the two-axis model (asset class + exposure),
fiscal-certainty status, cascade, verified test-set table, and the pre-prod
pending list.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 docs/RESOLVER.md | 132 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 docs/RESOLVER.md

diff --git a/docs/RESOLVER.md b/docs/RESOLVER.md
new file mode 100644
index 0000000..26c482b
--- /dev/null
+++ b/docs/RESOLVER.md
@@ -0,0 +1,132 @@
+# `resolve_asset` — classificador de ativos (taxonomia Wealthuman)
+
+> Entrega para o demandante (Wealthuman / consolidação de extratos). Define o
+> contrato que o consolidador chama por ativo (dezenas por extrato). Implementado
+> em [`src/findata/resolver/`](../src/findata/resolver/), exposto por REST, MCP e
+> biblioteca Python.
+
+## Problema
+
+A consolidação classifica cada ativo na taxonomia macro do banker. O agente
+antigo buscava ANBIMA/debentures.com.br no brave: lento e errava (chutava RV pelo
+"11" de um ETF de debênture, perdia mandato global sem "IE", confundia "Crédito
+Estruturado" com COE). `resolve_asset` devolve a classificação **determinística,
+cacheável e auditável**, já na taxonomia do cliente.
+
+## Como chamar
+
+Três superfícies, mesmo núcleo:
+
+| Superfície | Chamada |
+|---|---|
+| REST | `GET /resolver/resolve?ticker=IFRA11&name=FI%20ITAUINFRA` |
+| MCP | tool `resolve_asset` (args `name`/`ticker`/`cnpj`/`isin`) |
+| Python | `await findata.resolver.resolve_asset(ticker="IFRA11")` |
+
+**Input** — qualquer subconjunto de identificadores; o resolver normaliza e
+promove um identificador "pelado" passado em `name` (o extrato às vezes só tem o
+label):
+
+```json
+{ "name": "FI ITAUINFRA CI", "ticker": "IFRA11", "cnpj": null, "isin": null }
+```
+
+Sem PII: o resolver recebe **só** identificador de ativo, nunca dado de cliente.
+Limites de tamanho no boundary (`name` 256, `ticker` 16, `cnpj` 32, `isin` 16).
+
+## Contrato de saída
+
+```jsonc
+{
+  "identifier_resolved": { "cnpj": null, "ticker": "IFRA11", "isin": null, "name": "FI ITAUINFRA CI" },
+  "kind": "etf",                     // fundo|acao|fii|etf|bdr|debenture|cra|cri|cdb|lci_lca|tesouro|coe|outro
+  "cvm": { "classe": null, "anbima_categoria": null, "estrutura": "ETF" },
+  "macro_class": "Renda Fixa",       // CLASSE DE ATIVO (ver eixo 1 abaixo)
+  "subclasse": "Indexada à Inflação",
+  "exposure": "Brasil",              // GEOGRAFIA (ver eixo 2) — Brasil|Internacional|null
+  "underlying_nature": "debentures", // acoes|debentures|credito|recebiveis|imoveis|multiativos|tesouro|cambio|private_equity|outro
+  "debenture": {                     // só quando há debênture
+    "incentivada_1243": true,
+    "lei_12431_status": "confirmed", // confirmed|candidate|not_applicable|unknown
+    "indexador": "IPCA+",
+    "vencimento": null
+  },
+  "tax": { "isento": true, "isento_status": "confirmed_exempt" },
+  "source": "openfindata",           // openfindata|maisretorno|cvm|b3|web_search
+  "confidence": 0.97,                // 0..1; baixa => human-in-the-loop
+  "as_of": "2026-06-29",             // carimbado em America/Sao_Paulo
+  "cascade": ["openfindata:curated"],// trilha de fontes percorrida
+  "signals": [                       // trilha estruturada: que regra disparou e com qual evidência
+    { "rule": "curated_seed", "evidence": "ticker=IFRA11", "detail": null }
+  ],
+  "notes": "Curated: ETF de debêntures de infraestrutura (FI-Infra, Lei 12.431)…"
+}
+```
+
+### Dois eixos ortogonais (decisão de modelo)
+
+1. **`macro_class` = classe de ativo**: `Renda Fixa`, `Renda Variável`,
+   `Multimercado`, `Alternativos`, `Estruturados` (+ `Indefinido` quando o
+   resolver não decide). Geografia **não** é valor de macro.
+2. **`exposure` = geografia/estratégia**: `Brasil` | `Internacional` | `null`. É
+   onde a exposição econômica está, independente da classe. A B3 é o domicílio do
+   ativo, não a exposição. Logo:
+   - **IVVB11** (ETF de S&P 500 listado na B3) → `RV` + `exposure=Internacional`
+   - **BDR** → `RV` + `exposure=Internacional` (risco cambial/exterior)
+   - **FIA de mandato global** (ARBOR, WHG) → `RV` + `exposure=Internacional`
+
+### Eixo de certeza fiscal
+
+Os bools `incentivada_1243`/`isento` respondem "sim/não". Os status carregam a
+**certeza** que o bool não carrega:
+
+- `lei_12431_status`: `confirmed` (sinal explícito de infra / FI-Infra),
+  `candidate` (heurística emissor+IPCA, **confirmar por ISIN** antes de tratar
+  como isento), `not_applicable` (é debênture, mas não infra), `unknown`.
+- `isento_status`: `confirmed_exempt` (estatutário: CRA/CRI, LCI/LCA, 12.431
+  confirmada), `candidate_exempt` (heurística), `confirmed_taxable`, `unknown`.
+
+Quando `confidence < ~0.9` ou status `candidate`, é gancho de revisão humana.
+
+## Cascata de fontes (fallback)
+
+1. **openfindata** (primário, offline): seed curado + regras estruturais. Resolve
+   o test set sem rede.
+2. **Mais Retorno MCP** (dados BR de fundo/CNPJ/classe CVM).
+3. **outro provider** (CVM dados abertos / B3).
+4. **web_search restrito** a `maisretorno.com`, `b3.com.br`,
+   `yahoofinance.com.br`, `debentures.com.br`.
+
+Cada degrau preenche o que o anterior não trouxe e **baixa a confidence**;
+`source` reflete a origem final; `cascade` loga o caminho. Os degraus 2 a 4 são
+um ponto de extensão injetável (`AssetProvider`), consultado só quando o
+resultado do núcleo está fraco. No estado atual deste PR, **só o degrau 1 está
+ligado** (os externos são stubs a conectar no deploy).
+
+## Test set (passa 100%, offline)
+
+| Identificador | macro_class | exposure | nota |
+|---|---|---|---|
+| IFRA11 / FI ITAUINFRA | Renda Fixa | Brasil | ETF de debêntures de infra; "Indexada à Inflação"; isento confirmado |
+| ARBOR FIC FIA | Renda Variável | Internacional | mandato global sem "IE" |
+| WHG GLOBAL FIC FIA IE | Renda Variável | Internacional | estrutura IE |
+| DEB PETROBRAS IPCA+ | Renda Fixa | Brasil | debênture; incentivada **candidate** (confirmar ISIN) |
+| COE | Estruturados | (n/a) | `kind=coe`, **nunca** ETF |
+| "Crédito Estruturado" (Warren/AMW) | Renda Fixa | Brasil | name-trap: é crédito, não Estruturados |
+| IVVB11 | Renda Variável | Internacional | ETF de ações S&P 500 |
+| HGLG11 / MXRF11 | Renda Variável | Brasil | subclasse FII |
+
+## Não-funcionais
+
+- **Determinístico + cacheável**: mesmo identificador → mesma classificação
+  (exceto `as_of`); CNPJ/ticker mudam de classe raramente, cachear agressivo.
+- **Latência baixa**: núcleo é offline, sem I/O.
+- **Auditável**: sempre `source` + `as_of` + `cascade` + `signals`.
+- **Sem PII**: só identificador de ativo cruza o boundary.
+
+## Pendências antes de produção
+
+- Conectar os providers externos reais (Mais Retorno MCP, web search restrito).
+- Confirmação ISIN-level da incentivada (12.431) via ANBIMA/debentures.com.br no
+  degrau de cascata — hoje fica `candidate`.
+- Ampliar o seed curado de ETFs conforme novos ETFs forem listados na B3.

From 63b5b3f8e36746314eed2104b5466a0899281963 Mon Sep 17 00:00:00 2001
From: Roberto <robertoecf@users.noreply.github.com>
Date: Mon, 29 Jun 2026 22:12:48 -0300
Subject: [PATCH 7/7] fix(resolver): address remaining CodeRabbit review
 threads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- models: strict ConfigDict(extra="forbid") base for the contract models, so a
  typo in an internally-built DebentureInfo/TaxInfo payload fails loudly instead
  of silently dropping a field.
- engine: per-provider try/except in the cascade — a raising provider logs
  provider_error:<Type> on the cascade and continues, never nuking the
  deterministic core result.
- engine: tighten the ticker fallback — only suffixes 3-8 map to Ações; other
  suffixes (subscription rights / odd codes) defer to Indefinido/HITL. Bare ...11
  stays FII (spec test set: HGLG11/MXRF11).
- seed: token-aware name-marker matching, so ("ARBOR","FIA") matches
  "ARBOR FIC FIA" but not "ARBOR FIAGRO" (FIA not a token of FIAGRO).
- normalize: fold candidate args inside has_token/name_contains so natural-string
  callers cannot silently miss rules.
- tests: regressions for each of the above.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/findata/resolver/engine.py    | 35 ++++++++++++++++++++--------
 src/findata/resolver/models.py    | 27 ++++++++++++++++------
 src/findata/resolver/normalize.py | 15 ++++++++----
 src/findata/resolver/seed.py      | 13 +++++++++--
 tests/test_resolver.py            | 38 +++++++++++++++++++++++++++++++
 5 files changed, 106 insertions(+), 22 deletions(-)

diff --git a/src/findata/resolver/engine.py b/src/findata/resolver/engine.py
index 6cf0646..81159d7 100644
--- a/src/findata/resolver/engine.py
+++ b/src/findata/resolver/engine.py
@@ -360,15 +360,25 @@ def _ticker_payload(n: NormalizedInput) -> dict[str, Any]:
             "signals": _signal("bdr", f"ticker={n.ticker}"),
         }
     # 3-8: ordinary/preferred share — ação brasileira.
+    if suffix in {"3", "4", "5", "6", "7", "8"}:
+        return {
+            "kind": "acao",
+            "macro_class": "Renda Variável",
+            "subclasse": "Ações",
+            "exposure": "Brasil",
+            "underlying_nature": "acoes",
+            "confidence": 0.85,
+            "notes": "Ação listada na B3 → Renda Variável.",
+            "signals": _signal("acao", f"ticker={n.ticker}"),
+        }
+    # Other suffixes (1/2/9/10/12/13… subscription rights, receipts, odd codes)
+    # carry no reliable structural signal → defer to HITL/provider cascade.
     return {
-        "kind": "acao",
-        "macro_class": "Renda Variável",
-        "subclasse": "Ações",
-        "exposure": "Brasil",
-        "underlying_nature": "acoes",
-        "confidence": 0.85,
-        "notes": "Ação listada na B3 → Renda Variável.",
-        "signals": _signal("acao", f"ticker={n.ticker}"),
+        "kind": "outro",
+        "macro_class": "Indefinido",
+        "confidence": 0.2,
+        "notes": "Ticker com sufixo sem sinal estrutural suficiente; requer revisão (HITL).",
+        "signals": _signal("ticker_suffix_unknown", f"ticker={n.ticker}"),
     }
 
 
@@ -709,7 +719,14 @@ async def resolve_asset(
         # Stop early once we are confident — saves the network round-trips.
         if result.macro_class != "Indefinido" and result.confidence >= _CONFIDENT_ENOUGH:
             break
-        enriched = await provider(norm, result)
+        # Providers are best-effort enrichment: a flaky network/provider must not
+        # nuke the deterministic core result. Isolate the failure, log it on the
+        # cascade, and keep the last good classification.
+        try:
+            enriched = await provider(norm, result)
+        except Exception as exc:  # any provider failure is non-fatal
+            result.cascade.append(f"provider_error:{type(exc).__name__}")
+            continue
         if enriched is not None:
             enriched.cascade = [*result.cascade, *enriched.cascade]
             result = enriched
diff --git a/src/findata/resolver/models.py b/src/findata/resolver/models.py
index 2d00ee7..2b6f3b0 100644
--- a/src/findata/resolver/models.py
+++ b/src/findata/resolver/models.py
@@ -14,7 +14,20 @@
 
 from typing import Literal
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class _StrictModel(BaseModel):
+    """Base for the resolver contract models.
+
+    ``extra="forbid"`` so a typo in an internally-built payload (the engine
+    constructs ``DebentureInfo(**deb)`` / ``TaxInfo(**tax)`` from dicts) raises a
+    validation error instead of silently dropping the field and emitting a
+    partially empty classification.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
 
 # ── Controlled vocabularies ────────────────────────────────────────
 
@@ -86,7 +99,7 @@
 IsentoStatus = Literal["confirmed_exempt", "candidate_exempt", "confirmed_taxable", "unknown"]
 
 
-class Signal(BaseModel):
+class Signal(_StrictModel):
     """One structured audit entry: which rule fired and what evidence matched.
 
     Unlike the free-text ``notes``, a ``Signal`` is machine-readable so an
@@ -99,7 +112,7 @@ class Signal(BaseModel):
     detail: str | None = None  # optional extra (e.g. "basis=heuristic", "indexador=IPCA+")
 
 
-class IdentifierResolved(BaseModel):
+class IdentifierResolved(_StrictModel):
     """The identifiers the resolver could normalize/confirm from the input."""
 
     cnpj: str | None = None
@@ -108,7 +121,7 @@ class IdentifierResolved(BaseModel):
     name: str | None = None
 
 
-class CvmInfo(BaseModel):
+class CvmInfo(_StrictModel):
     """Raw upstream classification, kept for audit alongside the mapped macro."""
 
     classe: str | None = None
@@ -116,7 +129,7 @@ class CvmInfo(BaseModel):
     estrutura: str | None = None  # FIA | FIM | FIC | FIDC | FIP | FII | IE | ETF | ...
 
 
-class DebentureInfo(BaseModel):
+class DebentureInfo(_StrictModel):
     """Debenture-specific facts. Only populated when ``kind == 'debenture'``
     (or an FI-Infra ETF whose underlying *is* incentivada debentures)."""
 
@@ -128,7 +141,7 @@ class DebentureInfo(BaseModel):
     vencimento: str | None = None  # YYYY-MM when known
 
 
-class TaxInfo(BaseModel):
+class TaxInfo(_StrictModel):
     """Tax treatment for the typical PF holder."""
 
     isento: bool | None = None  # True for Lei 12.431 / LCI-LCA / FII dividends etc.
@@ -137,7 +150,7 @@ class TaxInfo(BaseModel):
     isento_status: IsentoStatus = "unknown"
 
 
-class AssetClassification(BaseModel):
+class AssetClassification(_StrictModel):
     """The full resolver output. One asset in → one auditable record out."""
 
     identifier_resolved: IdentifierResolved
diff --git a/src/findata/resolver/normalize.py b/src/findata/resolver/normalize.py
index a6188c7..d08662a 100644
--- a/src/findata/resolver/normalize.py
+++ b/src/findata/resolver/normalize.py
@@ -59,13 +59,20 @@ class NormalizedInput:
     isin: str | None = None
 
     def has_token(self, *candidates: str) -> bool:
-        """True if any candidate appears as a whole token."""
+        """True if any candidate appears as a whole token.
+
+        Candidates are folded internally, so callers may pass natural strings
+        ("Ações") or pre-folded markers ("ACOES") interchangeably.
+        """
         tset = set(self.tokens)
-        return any(c in tset for c in candidates)
+        return any(fold(c) in tset for c in candidates)
 
     def name_contains(self, *needles: str) -> bool:
-        """True if any needle is a substring of the folded name (phrase match)."""
-        return any(n in self.name_folded for n in needles)
+        """True if any needle is a substring of the folded name (phrase match).
+
+        Needles are folded internally (see :meth:`has_token`).
+        """
+        return any(fold(n) in self.name_folded for n in needles)
 
     @property
     def ticker_digits_suffix(self) -> str | None:
diff --git a/src/findata/resolver/seed.py b/src/findata/resolver/seed.py
index 05f9758..e6a074c 100644
--- a/src/findata/resolver/seed.py
+++ b/src/findata/resolver/seed.py
@@ -28,6 +28,8 @@
 from dataclasses import dataclass, field
 from typing import Any
 
+from findata.resolver.normalize import tokenize
+
 
 @dataclass(frozen=True)
 class SeedEntry:
@@ -167,8 +169,15 @@ def lookup_seed(*, ticker: str | None, cnpj: str | None, name_folded: str) -> Se
         return _BY_CNPJ[cnpj]
     if name_folded:
         # ``name_folded`` is already ASCII-folded/uppercased by the caller
-        # (normalize()), so match directly — no second fold.
+        # (normalize()), so no second fold. Single-word markers must match a
+        # whole token — so ("ARBOR", "FIA") matches "ARBOR FIC FIA" but NOT
+        # "ARBOR FIAGRO" (FIA ⊄ FIAGRO as a token). Multi-word / non-alnum
+        # markers fall back to substring.
+        tokens = set(tokenize(name_folded))
         for entry in _NAME_ENTRIES:
-            if all(sub in name_folded for sub in entry.name_substrings):
+            if all(
+                (sub in tokens) if sub.isalnum() else (sub in name_folded)
+                for sub in entry.name_substrings
+            ):
                 return entry
     return None
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
index b2b304f..2c37076 100644
--- a/tests/test_resolver.py
+++ b/tests/test_resolver.py
@@ -343,3 +343,41 @@ def test_credito_estruturado_trap_signal_carries_phrase():
 def test_every_result_carries_at_least_one_signal(ident):
     r = classify(normalize(name=ident))
     assert len(r.signals) >= 1
+
+
+# ── Review-bot regressions (PR #33 threads) ────────────────────────
+
+
+def test_unknown_ticker_suffix_is_indefinido_not_acao():
+    # CodeRabbit: bare ...13 (subscription receipt / odd code) must not be forced
+    # into RV/Ações; it has no structural signal → defer to HITL.
+    r = classify(normalize(ticker="XPTO13"))
+    assert r.macro_class == "Indefinido"
+    assert r.kind == "outro"
+
+
+def test_arbor_fiagro_does_not_match_global_equity_seed():
+    # CodeRabbit: the ("ARBOR","FIA") seed must match by token, so "ARBOR FIAGRO"
+    # (a real-estate/agro vehicle, FIA ⊄ FIAGRO) is NOT swept into RV/Internacional.
+    r = classify(normalize(name="ARBOR FIAGRO FII"))
+    assert not (r.macro_class == "Renda Variável" and r.exposure == "Internacional")
+
+
+def test_provider_failure_does_not_abort_resolution():
+    # CodeRabbit: a raising provider must not nuke the deterministic core result.
+    async def boom(norm, current):
+        raise RuntimeError("network down")
+
+    r = asyncio.run(resolve_asset(name="????", providers=[boom]))
+    assert r is not None
+    assert any(c.startswith("provider_error:") for c in r.cascade)
+
+
+def test_contract_models_forbid_unknown_keys():
+    # CodeRabbit: a typo in an internally-built payload must fail loudly.
+    from pydantic import ValidationError
+
+    from findata.resolver.models import DebentureInfo
+
+    with pytest.raises(ValidationError):
+        DebentureInfo(incentivado_1243=True)  # typo: incentivADO