Skip to content

Invalid state saved to checkpoint without validation, causing permanent corruption. #6491

@goma-25

Description

@goma-25

Checked other resources

  • This is a bug, not a usage question. For questions, please use the LangChain Forum (https://forum.langchain.com/).
  • I added a clear and detailed title that summarizes the issue.
  • I read what a minimal reproducible example is (https://stackoverflow.com/help/minimal-reproducible-example).
  • I included a self-contained, minimal example that demonstrates the issue INCLUDING all the relevant imports. The code run AS IS to reproduce the issue.

Example Code

from langchain_core.runnables import RunnableConfig
from typing import List
from pydantic import BaseModel
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver


class State(BaseModel):
    items: List[str] = []


def create_invalid_state(state: State) -> State:
    """Node that returns invalid state (None in List[str])."""
    state.items.append(None)  # Invalid! But no validation occurs on OUTPUT
    return state


# Build graph
graph = StateGraph(State)
graph.add_node("bad", create_invalid_state)
graph.add_node("next", lambda s: s)  # This node will never execute
graph.add_edge(START, "bad")
graph.add_edge("bad", "next")
graph.add_edge("next", END)

app = graph.compile(checkpointer=MemorySaver())
config: RunnableConfig = {"configurable": {"thread_id": "test"}}

print("Executing graph...")
try:
    app.invoke(State(), config)
except Exception as e:
    print(f"✓ Graph execution failed (expected): {type(e).__name__}")

print("\nAttempting to retrieve checkpoint...")
history = list(app.get_state_history(config)) # ValidationError is raised!!

Error Message and Stack Trace (if applicable)

---------------------------------------------------------------------------
ValidationError                           Traceback (most recent call last)
Cell In[2], line 36
     33     print(f"✓ Graph execution failed (expected): {type(e).__name__}")
     35 print("\nAttempting to retrieve checkpoint...")
---> 36 history = list(app.get_state_history(config))

File /Volumes/p41/Users/mir/goma/repeat/.venv/lib/python3.13/site-packages/langgraph/pregel/main.py:1360, in Pregel.get_state_history(self, config, filter, before, limit)
   1356 # eagerly consume list() to avoid holding up the db cursor
   1357 for checkpoint_tuple in list(
   1358     checkpointer.list(config, before=before, limit=limit, filter=filter)
   1359 ):
-> 1360     yield self._prepare_state_snapshot(
   1361         checkpoint_tuple.config, checkpoint_tuple
   1362     )

File /Volumes/p41/Users/mir/goma/repeat/.venv/lib/python3.13/site-packages/langgraph/pregel/main.py:1019, in Pregel._prepare_state_snapshot(self, config, saved, recurse, apply_pending_writes)
   1014 channels, managed = channels_from_checkpoint(
   1015     self.channels,
   1016     saved.checkpoint,
   1017 )
   1018 # tasks for this checkpoint
-> 1019 next_tasks = prepare_next_tasks(
   1020     saved.checkpoint,
   1021     saved.pending_writes or [],
   1022     self.nodes,
   1023     channels,
   1024     managed,
   1025     saved.config,
   1026     step,
   1027     stop,
   1028     for_execution=True,
   1029     store=self.store,
   1030     checkpointer=(
   1031         self.checkpointer
   1032         if isinstance(self.checkpointer, BaseCheckpointSaver)
   1033         else None
   1034     ),
   1035     manager=None,
   1036 )
   1037 # get the subgraphs
   1038 subgraphs = dict(self.get_subgraphs())

File /Volumes/p41/Users/mir/goma/repeat/.venv/lib/python3.13/site-packages/langgraph/pregel/_algo.py:466, in prepare_next_tasks(checkpoint, pending_writes, processes, channels, managed, config, step, stop, for_execution, store, checkpointer, manager, trigger_to_nodes, updated_channels, retry_policy, cache_policy)
    463 # Check if any processes should be run in next step
    464 # If so, prepare the values to be passed to them
    465 for name in candidate_nodes:
--> 466     if task := prepare_single_task(
    467         (PULL, name),
    468         None,
    469         checkpoint=checkpoint,
    470         checkpoint_id_bytes=checkpoint_id_bytes,
    471         checkpoint_null_version=null_version,
    472         pending_writes=pending_writes,
    473         processes=processes,
    474         channels=channels,
    475         managed=managed,
    476         config=config,
    477         step=step,
    478         stop=stop,
    479         for_execution=for_execution,
    480         store=store,
    481         checkpointer=checkpointer,
    482         manager=manager,
    483         input_cache=input_cache,
    484         cache_policy=cache_policy,
    485         retry_policy=retry_policy,
    486     ):
    487         tasks.append(task)
    488 return {t.id: t for t in tasks}

File /Volumes/p41/Users/mir/goma/repeat/.venv/lib/python3.13/site-packages/langgraph/pregel/_algo.py:807, in prepare_single_task(task_path, task_id_checksum, checkpoint, checkpoint_id_bytes, checkpoint_null_version, pending_writes, processes, channels, managed, config, step, stop, for_execution, store, checkpointer, manager, input_cache, cache_policy, retry_policy)
    805 # create task input
    806 try:
--> 807     val = _proc_input(
    808         proc,
    809         managed,
    810         channels,
    811         for_execution=for_execution,
    812         input_cache=input_cache,
    813         scratchpad=scratchpad,
    814     )
    815     if val is MISSING:
    816         return

File /Volumes/p41/Users/mir/goma/repeat/.venv/lib/python3.13/site-packages/langgraph/pregel/_algo.py:1057, in _proc_input(proc, managed, channels, for_execution, scratchpad, input_cache)
   1055 # If the process has a mapper, apply it to the value
   1056 if for_execution and proc.mapper is not None:
-> 1057     val = proc.mapper(val)
   1059 # Cache the input value
   1060 if input_cache is not None:

File /Volumes/p41/Users/mir/goma/repeat/.venv/lib/python3.13/site-packages/langgraph/graph/state.py:1237, in _coerce_state(schema, input)
   1236 def _coerce_state(schema: type[Any], input: dict[str, Any]) -> dict[str, Any]:
-> 1237     return schema(**input)

File /Volumes/p41/Users/mir/goma/repeat/.venv/lib/python3.13/site-packages/pydantic/main.py:253, in BaseModel.__init__(self, **data)
    251 # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    252 __tracebackhide__ = True
--> 253 validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    254 if self is not validated_self:
    255     warnings.warn(
    256         'A custom validator is returning a value other than `self`.\n'
    257         "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
    258         'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
    259         stacklevel=2,
    260     )

ValidationError: 1 validation error for State
items.0
  Input should be a valid string [type=string_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
Before task with name 'next' and path '('__pregel_pull', 'next')'

Description

Problem

When a node returns invalid state (violating Pydantic schema), the checkpoint is saved successfully but cannot be retrieved later due to validation failure. This creates permanently corrupted checkpoints that cannot be recovered.

Root Cause

LangGraph validates node INPUT (when preparing the next task) but does NOT validate node OUTPUT (after execution completes). This creates a timing gap where invalid state can be checkpointed before validation occurs.

System Info

System Information

OS: Darwin
OS Version: Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:40 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T8132
Python Version: 3.13.5 (main, Jun 12 2025, 12:22:43) [Clang 20.1.4 ]

Package Information

langchain_core: 1.0.4
langchain: 1.0.5
langsmith: 0.4.21
langchain_anthropic: 0.3.19
langchain_google_genai: 2.1.10
langchain_openai: 0.3.32
langchain_tool: 0.1.1
langgraph_sdk: 0.2.4

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingpendingawaiting review/confirmation by maintainer

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions