Apply coderabbit comments

guyueh1 · guyueh1 · commit 2e2f4d3712a9 · 2025-11-09T15:06:12.000-08:00
Signed-off-by: Guyue Huang &lt;guyueh@nvidia.com&gt;
diff --git a/nemo_rl/data/__init__.py b/nemo_rl/data/__init__.py
@@ -41,6 +41,10 @@ class DataConfig(TypedDict):
     # This saturates CPU threads without consuming too much memory
     # However, setting it too high might cause memory issues for long seqlens.
     num_workers: NotRequired[int]
+    # Specifies input sequence length for synthetic datasets.
+    # - int: fixed input length
+    # - Dict[str, Any]: generator config with 'mean' and 'std' keys for sampling from normal distribution
+    # Used by RandomDataset for benchmarking. Recommended default: not set (dataset-specific).
     input_len_or_input_len_generator: NotRequired[Dict[str, Any] | int]
 
 
diff --git a/nemo_rl/data/datasets/random_dataset.py b/nemo_rl/data/datasets/random_dataset.py
@@ -24,6 +24,17 @@
 
 
 class RandomDataset:
+    """Synthetic dataset that generates random input sequences of varying lengths.
+
+    This dataset is used for benchmarking purposes. It is not meant to be used for training or evaluation.
+
+    Args:
+        input_len_or_input_len_generator: An integer or a dictionary with keys 'mean' and 'std' for the normal distribution that samples the input length.
+
+    Returns:
+        A RandomDataset object.
+    """
+
     def __init__(
         self,
         input_len_or_input_len_generator: Callable | int,
diff --git a/nemo_rl/environments/dummy_environment.py b/nemo_rl/environments/dummy_environment.py
@@ -22,7 +22,7 @@
 from nemo_rl.environments.interfaces import EnvironmentInterface, EnvironmentReturn
 
 
-@ray.remote(max_restarts=-1, max_task_retries=-1)
+@ray.remote(max_restarts=-1, max_task_retries=-1)  # pragma: no cover
 class DummyEnvironment(EnvironmentInterface):
     def _init__(self):
         pass
diff --git a/nemo_rl/models/generation/interfaces.py b/nemo_rl/models/generation/interfaces.py
@@ -116,7 +116,22 @@ class ColocationConfig(TypedDict):
 
 
 class GenerationConfig(TypedDict):
-    """Configuration for generation."""
+    """Configuration for generation.
+
+    Args:
+        backend: The backend to use for generation.
+        max_new_tokens: The maximum number of tokens to generate.
+        temperature: The temperature for sampling.
+        top_p: The top-p sampling parameter.
+        top_k: The top-k sampling parameter.
+        model_name: The name of the model.
+        stop_token_ids: The list of token IDs to stop generation.
+        stop_strings: The list of strings to stop generation.
+        ignore_eos: Whether to ignore the EOS token. This is only used for performance benchmarking purposes.
+        output_len_or_output_len_generator: An integer or a dictionary with keys 'mean' and 'std' for the normal distribution that samples the output length. This is only used for performance benchmarking purposes.
+        colocated: The configuration for colocated generation.
+        _pad_token_id: The padding token ID.
+    """
 
     backend: str
     max_new_tokens: int
diff --git a/nemo_rl/utils/sequence_length_generator.py b/nemo_rl/utils/sequence_length_generator.py
@@ -17,8 +17,19 @@
 
 
 def get_sequence_length_generator(sequence_length_generator_cfg: dict) -> Callable:
+    """Returns a callable that samples sequence lengths from a normal distribution.
+
+    Args:
+        sequence_length_generator_cfg: Dict with keys 'mean' and 'std' for the normal distribution.
+
+    Returns:
+        A callable that when invoked returns a sampled sequence length (int >= 1).
+    """
     mean = sequence_length_generator_cfg["mean"]
     std = sequence_length_generator_cfg["std"]
-    length = int(np.round(np.random.normal(mean, std)))
-    length = max(1, length)
-    return length
+
+    def sample_length() -> int:
+        length = int(np.round(np.random.normal(mean, std)))
+        return max(1, length)
+
+    return sample_length