unslothai · danielhanchen · Nov 20, 2025 · Nov 17, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py
@@ -822,12 +822,21 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
         extra_args += check_dr_grpo
 
     # Check GRPO num_generations mismatch
-    if "per_device_train_batch_size" in call_args and "num_generations" in call_args:
+    if (
+        "per_device_train_batch_size" in call_args
+        and "num_generations" in call_args
+        and "steps_per_generation" in call_args
+        and "generation_batch_size" in call_args
+    ):
+        # if world size is not set by accelerate or torchrun at this point it will be 1
         check_num_generations = (
-            "if (per_device_train_batch_size // num_generations) * num_generations != per_device_train_batch_size:\n"
-            "    print('Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.\\n"
+            "if steps_per_generation is None and generation_batch_size is None:\n"
+            "    ga = gradient_accumulation_steps\n"
+            "    world_size = int(os.environ.get('WORLD_SIZE', '1'))\n"
+            "    if (ga * world_size * per_device_train_batch_size) % num_generations != 0:\n"
+            "        print('Unsloth: We now expect `per_device_train_batch_size` * `gradient_accumulation_steps` * `world_size` to be a multiple of `num_generations`.\\n"
             "We will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations))\n"
-            "    per_device_train_batch_size = num_generations\n"
+            "        per_device_train_batch_size = num_generations\n"
             "\n"
         )
         extra_args += check_num_generations

diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py
@@ -853,6 +853,7 @@ def compute_loss(
 # TRL warns if batch size is not a multiple of num_generations -> fix this.
 def grpo_trainer_fix_batch_size(RLTrainer_source, RLConfig_source):
     if "divisible by the number of generations" not in RLTrainer_source:
+        # in later trl versions this doesn't exist anymore
         return ""
     if "num_generations" not in RLConfig_source:
         return ""