Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/configs/distillation_math.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ policy: &POLICY_BASE
top_k: null
stop_token_ids: null
stop_strings: null
ignore_eos: false
vllm_cfg:
async_engine: false
precision: ${...precision}
Expand Down
1 change: 1 addition & 0 deletions examples/configs/evals/eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ generation:
model_name: "Qwen/Qwen2.5-Math-1.5B-Instruct"
stop_token_ids: null
stop_strings: null
ignore_eos: false
vllm_cfg:
async_engine: false
precision: "bfloat16"
Expand Down
1 change: 1 addition & 0 deletions examples/configs/grpo_math_1B.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ policy:
top_k: null
stop_token_ids: null
stop_strings: null
ignore_eos: false
vllm_cfg:
async_engine: false
precision: ${policy.precision}
Expand Down
1 change: 1 addition & 0 deletions examples/configs/vlm_grpo_3B.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ policy:
top_k: null
stop_token_ids: null
stop_strings: null
ignore_eos: false
vllm_cfg:
async_engine: false # Only for internal testing, will be enabled by https://github.com/NVIDIA/NeMo-RL/issues/447.
precision: ${policy.precision}
Expand Down
1 change: 1 addition & 0 deletions examples/configs/vlm_grpo_3B_megatron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ policy:
top_k: null
stop_token_ids: null
stop_strings: null
ignore_eos: false
vllm_cfg:
async_engine: false
precision: ${policy.precision}
Expand Down
135 changes: 135 additions & 0 deletions examples/run_eval_random_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import pprint
import sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from omegaconf import OmegaConf
from transformers import AutoTokenizer, PreTrainedTokenizerBase

from nemo_rl.algorithms.utils import get_tokenizer
from nemo_rl.data.datasets import AllTaskProcessedDataset, RandomDataset
from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
from nemo_rl.distributed.virtual_cluster import init_ray
from nemo_rl.environments.dummy_environment import DummyEnvironment
from nemo_rl.evals.eval import MasterConfig, run_env_eval, setup
from nemo_rl.models.generation import configure_generation_config
from nemo_rl.utils.config import load_config, parse_hydra_overrides

TokenizerType = PreTrainedTokenizerBase


def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Run Evaluation with configuration")
parser.add_argument(
"--config", type=str, default=None, help="Path to YAML config file"
)

# Parse known args for the script
args, overrides = parser.parse_known_args()

return args, overrides


def setup_data(tokenizer: AutoTokenizer, data_config, env_configs):
print("Setting up data...")

# load dataset
base_dataset = RandomDataset(data_config["input_len_or_input_len_generator"])

env = DummyEnvironment.options(
runtime_env={
"py_executable": get_actor_python_env(
"nemo_rl.environments.math_environment.MathEnvironment"
)
}
).remote()

dataset = AllTaskProcessedDataset(
dataset=base_dataset.formatted_ds["train"],
tokenizer=tokenizer,
default_task_data_spec=base_dataset.task_spec,
task_data_processors=base_dataset.processor,
max_seq_length=data_config["max_input_seq_length"],
)
Comment on lines +54 to +70
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Normalize input_len_or_input_len_generator before building the dataset.

If the config supplies a dict (mean/stddev case), we currently pass that dict straight into RandomDataset. Downstream the processor treats non-callables as literal lengths, so we hit torch.randint(..., (dict,)) and explode. Mirror the GRPO script: detect dicts, convert them via get_sequence_length_generator, and store the resulting callable/int back into the config before constructing the dataset.

🤖 Prompt for AI Agents
In examples/run_eval_random_dataset.py around lines 54 to 70, the config value
data_config["input_len_or_input_len_generator"] may be a dict (mean/stddev) and
is being passed directly into RandomDataset causing downstream torch.randint
errors; detect if that config entry is a dict and, if so, replace it with the
callable/int returned by
get_sequence_length_generator(data_config["input_len_or_input_len_generator"])
before creating base_dataset so the dataset receives a proper sequence length
generator.


return dataset, env, tokenizer


def main():
"""Main entry point."""
# Parse arguments
args, overrides = parse_args()

if not args.config:
args.config = os.path.join(
os.path.dirname(__file__), "configs", "evals", "eval.yaml"
)

config = load_config(args.config)
print(f"Loaded configuration from: {args.config}")

if overrides:
print(f"Overrides: {overrides}")
config = parse_hydra_overrides(config, overrides)

config: MasterConfig = OmegaConf.to_container(config, resolve=True)
print("Applied CLI overrides")

# Print config
print("Final config:")
pprint.pprint(config)

# Init ray
init_ray()

# Setup tokenizer
tokenizer = get_tokenizer(config["tokenizer"])
config["generation"] = configure_generation_config(
config["generation"], tokenizer, is_eval=True
)
config["generation"]["vllm_cfg"]["load_format"] = (
"dummy" # for random dataset eval, we use dummy weight initialization
)

# Setup data
(
dataset,
env,
tokenizer,
) = setup_data(tokenizer, config["data"], config["env"])

# Setup
(
vllm_generation,
dataloader,
master_config,
) = setup(config, tokenizer, dataset)

# Run evaluation
run_env_eval(
vllm_generation,
dataloader,
env,
master_config,
)


if __name__ == "__main__":
main()
Loading
Loading