Load optimizer state when appropriate (#140)

danobi · web-flow · commit 7728f5a3cf54 · 2025-12-01T18:44:54.000-08:00
load_state() does not actually load optimizer state. The API has been fixed to make loading optimizer state explicit.

This commit migrates load_state() callers to use
load_state_with_optimizer() when appropriate. Next we'll fix callers of create_training_client_from_state().

Signed-off-by: Daniel Xu &lt;daniel@thinkingmachines.ai&gt;
diff --git a/llms-full.txt b/llms-full.txt
@@ -607,11 +607,12 @@ We'll start with a couple of general pages that'll be relevant to almost all of
 
 # Saving and loading weights and optimizer state
 
-During training, you'll need to save checkpoints for two main purposes: *sampling* (to test your model) and *resuming training* (to continue from where you left off). The `TrainingClient` provides three methods to handle these cases:
+During training, you'll need to save checkpoints for two main purposes: *sampling* (to test your model) and *resuming training* (to continue from where you left off). The `TrainingClient` provides these methods to handle these cases:
 
 1. `save_weights_for_sampler()`: saves a copy of the model weights that can be used for sampling.
 2. `save_state()`: saves the weights and the optimizer state. You can fully resume training from this checkpoint.
-3. `load_state()`: load the weights and the optimizer state. You can fully resume training from this checkpoint.
+3. `load_state()`: load the model weights only (without optimizer state). Use this when you want to start fresh training from a checkpoint, e.g., starting DPO training from an SFT checkpoint.
+4. `load_state_with_optimizer()`: load the model weights and optimizer state. Use this when resuming interrupted training, as it restores the full training state including optimizer momentum.
 
 Note that (1) is faster and requires less storage space than (2).
 
@@ -644,24 +645,37 @@ sampling_client = training_client.save_weights_and_get_sampling_client(name="000
 
 ### Example: Saving to resume training
 
-Use `save_state()` and `load_state()` when you need to pause and continue training with full optimizer state preferred:
+Use `save_state()` and `load_state_with_optimizer()` when you need to pause and continue training with full optimizer state:
 
 ```python
 # Save a checkpoint that you can resume from
 resume_path = training_client.save_state(name="0010").result().path
 
-# Load that checkpoint
-training_client.load_state(resume_path)
+# Load that checkpoint with optimizer state (for resuming training)
+training_client.load_state_with_optimizer(resume_path)
 ```
 
-### When to use `save_state()` and `load_state()`:
+Async versions are also available: `load_state_with_optimizer_async()`.
 
+### Example: Starting fresh from a checkpoint
 
-- Multi-step training pipelines (e.g. supervised learning followed by reinforcement learning)
-- Adjusting hyperparameters or data mid-run
-- Recovery from interruptions or failures
+Use `load_state()` when you want to start a new training phase from saved weights (e.g., starting DPO from an SFT checkpoint):
+
+```python
+# Load weights only, starting with fresh optimizer state
+training_client.load_state(sft_checkpoint_path)
+```
+
+### When to use `load_state_with_optimizer()`:
+
+- Recovery from interruptions or failures (resume training exactly where you left off)
 - Any scenario where you need to preserve exact optimizer state (momentum, learning rate schedules, etc.)
 
+### When to use `load_state()`:
+
+- Multi-step training pipelines (e.g., starting DPO training from an SFT checkpoint)
+- Starting fresh training from pretrained weights with a new optimizer
+
 
 ---
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
     "numpy",
     "rich",
     "termcolor",
-    "tinker>=0.3.0",
+    "tinker>=0.6.0",
     "torch",
     "transformers",
     "blobfile",
diff --git a/tinker_cookbook/distillation/train_on_policy.py b/tinker_cookbook/distillation/train_on_policy.py
@@ -384,7 +384,7 @@ async def main(
         resume_info["state_path"] if resume_info else cfg.load_checkpoint_path
     )
     if load_state_path:
-        future = await training_client.load_state_async(load_state_path)
+        future = await training_client.load_state_with_optimizer_async(load_state_path)
         _ = await future.result_async()
         logger.info(f"Loaded state from {load_state_path}")
 
diff --git a/tinker_cookbook/preference/train_dpo.py b/tinker_cookbook/preference/train_dpo.py
@@ -91,14 +91,15 @@ def create_dpo_clients(
         base_model=config.model_name, rank=config.lora_rank
     )
 
-    # Load state first to get the SFT checkpoint path for the reference client
-    load_state_path: str | None = (
-        resume_info["state_path"] if resume_info else config.load_checkpoint_path
-    )
-    if load_state_path:
-        # Load state into the training client
-        training_client.load_state(load_state_path).result()
-        logger.info(f"Loaded weights from {load_state_path}")
+    # Load state - differentiate between resuming DPO training vs starting fresh from SFT
+    if resume_info:
+        # Resuming interrupted DPO training - load optimizer state for proper continuation
+        training_client.load_state_with_optimizer(resume_info["state_path"]).result()
+        logger.info(f"Resumed DPO training from {resume_info['state_path']}")
+    elif config.load_checkpoint_path:
+        # Starting fresh DPO from SFT checkpoint - load weights only (fresh optimizer)
+        training_client.load_state(config.load_checkpoint_path).result()
+        logger.info(f"Loaded weights from {config.load_checkpoint_path}")
     # Create a sampling client for the reference model from the training client
     reference_client = training_client.save_weights_and_get_sampling_client("reference")
     return training_client, reference_client

Original file line number	Diff line number	Diff line change
`@@ -384,7 +384,7 @@ async def main(`
`384`	`384`	`resume_info["state_path"] if resume_info else cfg.load_checkpoint_path`
`385`	`385`	`)`
`386`	`386`	`if load_state_path:`
`387`		`- future = await training_client.load_state_async(load_state_path)`
	`387`	`+ future = await training_client.load_state_with_optimizer_async(load_state_path)`
`388`	`388`	`_ = await future.result_async()`
`389`	`389`	`logger.info(f"Loaded state from {load_state_path}")`
`390`	`390`