Apply isort and black reformatting

genquan9 · genquan9 · commit 7277489af423 · 2025-11-20T21:27:51.000Z
Signed-off-by: genquan9 &lt;genquan9@users.noreply.github.com&gt;
Signed-off-by: genquan9 &lt;genquan@google.com&gt;
diff --git a/nemo/collections/llm/gpt/model/gemma3.py b/nemo/collections/llm/gpt/model/gemma3.py
@@ -21,6 +21,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Annotated, Callable, Optional, Tuple, Union
 
+import torch
 from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
@@ -30,8 +31,6 @@
 from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
 from megatron.core.transformer.enums import AttnBackend, AttnMaskType
 from megatron.core.transformer.mlp import MLP, MLPSubmodules
-
-import torch
 from torch import Tensor, nn
 
 from nemo.collections.llm.fn.activation import openai_gelu
diff --git a/nemo/collections/vlm/gemma3vl/data/task_encoder.py b/nemo/collections/vlm/gemma3vl/data/task_encoder.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import logging
 import json
+import logging
 from dataclasses import dataclass, field
 from typing import Optional
 
@@ -104,7 +104,6 @@ def encode_batch(self, batch_data: DataBatch) -> dict:
         batch_data["media"] = batch_data["media"].reshape(-1, *batch_data["media"].shape[2:])
         return batch_data
 
-
     def encode_vqa_sample_multi_turns(self, input_sample: VQASample):
         images = input_sample.image if isinstance(input_sample.image, list) else [input_sample.image]
 
@@ -116,10 +115,12 @@ def encode_vqa_sample_multi_turns(self, input_sample: VQASample):
             messages.append(context)
 
         # Apply chat template and process with HF processor
-        #`add_generation_prompt=False` because we're providing the full ground truth sequence
+        # `add_generation_prompt=False` because we're providing the full ground truth sequence
         # We remove the <bos> token using removeprefix('<bos>') since we're finetuning.
         # The Processor will add this token before training and the model expects only one.
-        converted_messages = self.hf_processor.apply_chat_template(messages, add_generation_prompt=False, tokenize=False).removeprefix('<bos>')
+        converted_messages = self.hf_processor.apply_chat_template(
+            messages, add_generation_prompt=False, tokenize=False
+        ).removeprefix('<bos>')
         outputs = self.hf_processor(
             images=images,
             text=converted_messages,
@@ -140,7 +141,9 @@ def encode_vqa_sample_multi_turns(self, input_sample: VQASample):
             if context['role'] != 'assistant':
                 continue
             # Tokenize the answer, including the stop string if provided
-            answer_with_stop = context['content'][0]['text'].rstrip().lstrip() + "<end_of_turn>" + (self.config.stop_string or "")
+            answer_with_stop = (
+                context['content'][0]['text'].rstrip().lstrip() + "<end_of_turn>" + (self.config.stop_string or "")
+            )
             answer_with_stop = answer_with_stop.rstrip().lstrip()
             answer_tokens = self.tokenizer.tokenizer(answer_with_stop, add_special_tokens=False)["input_ids"]
             answer_tokens_tensor = torch.tensor(answer_tokens, device=tokens.device)  # Ensure same device
@@ -171,7 +174,6 @@ def encode_vqa_sample_multi_turns(self, input_sample: VQASample):
                 break
         return tokens, labels, images
 
-
     def encode_vqa_sample(self, input_sample: VQASample) -> DataSample:
         """Encode a VQA sample into a DataSample format.
 
@@ -228,4 +230,3 @@ def encode_vqa_sample(self, input_sample: VQASample) -> DataSample:
         )
 
         return sample
-
diff --git a/nemo/collections/vlm/gemma3vl/model/base.py b/nemo/collections/vlm/gemma3vl/model/base.py
@@ -26,7 +26,6 @@
 from megatron.core.inference_params import InferenceParams
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.parallel_state import get_context_parallel_group
-from nemo.collections.vlm.qwen2vl.data.multimodal_tokens import IGNORE_INDEX
 from megatron.core.tensor_parallel import scatter_to_sequence_parallel_region
 from megatron.core.transformer import MegatronModule
 from megatron.core.transformer.transformer_config import TransformerConfig
@@ -38,11 +37,11 @@
 from nemo.collections.llm.gpt.model.gemma3 import Gemma3Config
 from nemo.collections.vlm.gemma3vl.model.vision import Gemma3VLMultimodalProjectorConfig, Gemma3VLVisionConfig
 from nemo.collections.vlm.neva.model.base import MODEL_CONFIG_ATTR, NevaModel, restore_model_weights
+from nemo.collections.vlm.qwen2vl.data.multimodal_tokens import IGNORE_INDEX
 from nemo.lightning import io
 from nemo.lightning.pytorch.optim import OptimizerModule
 from nemo.utils.import_utils import safe_import_from
 
-
 TENorm, _ = safe_import_from("megatron.core.extensions.transformer_engine", "TENorm")
 
 HAVE_TEX = True
diff --git a/scripts/vlm/gemma3vl_export.py b/scripts/vlm/gemma3vl_export.py
@@ -1,73 +1,73 @@
 """Export Gemma3VL NeMo checkpoints to Hugging Face format."""
 
 import argparse
-from huggingface_hub import hf_hub_download
 import importlib
 import os
-from pathlib import Path
 import sys
+from pathlib import Path
+
+from huggingface_hub import hf_hub_download
+
 from nemo.collections import llm
 
 
 def main():
-  parser = argparse.ArgumentParser(
-      description=(
-          "Export NeMo vision language model checkpoint to Hugging Face format."
-      )
-  )
-  parser.add_argument(
-      "--nemo_ckpt_path",
-      type=str,
-      required=True,
-      default=None,
-      help="Path to the NeMo checkpoint directory.",
-  )
-  parser.add_argument(
-      "--output_hf_path",
-      type=str,
-      required=True,
-      default=None,
-      help="Path to save the converted Hugging Face checkpoint.",
-  )
-  parser.add_argument(
-      "--model_name",
-      type=str,
-      required=False,
-      default=None,
-      help="Name of the model on Hugging Face.",
-  )
+    parser = argparse.ArgumentParser(
+        description=("Export NeMo vision language model checkpoint to Hugging Face format.")
+    )
+    parser.add_argument(
+        "--nemo_ckpt_path",
+        type=str,
+        required=True,
+        default=None,
+        help="Path to the NeMo checkpoint directory.",
+    )
+    parser.add_argument(
+        "--output_hf_path",
+        type=str,
+        required=True,
+        default=None,
+        help="Path to save the converted Hugging Face checkpoint.",
+    )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        required=False,
+        default=None,
+        help="Name of the model on Hugging Face.",
+    )
 
-  args = parser.parse_args()
+    args = parser.parse_args()
 
-  llm.export_ckpt(
-      path=Path(args.nemo_ckpt_path),
-      target="hf",
-      output_path=Path(args.output_hf_path),
-      overwrite=True,
-  )
-  if args.model_name:
-    # Copy necessary files if exist from HuggingFace for Gemma3VL model export.
-    copy_file_list = [
-        "preprocessor_config.json",
-        "chat_template.json",
-        "config.json",
-        "generation_config.json",
-        "merges.txt",
-        "tokenizer.json",
-        "tokenizer_config.json",
-        "vocab.json",
-    ]
-    for file_name in copy_file_list:
-      try:
-        downloaded_path = hf_hub_download(
-            repo_id=args.model_name,
-            filename=file_name,
-            local_dir=args.output_hf_path,
-        )
-        print(f"Downloaded {downloaded_path} during export gamma3vl models.")
-      except:
-        print(f"Ignore {file_name} during export gamma3vl models.")
+    llm.export_ckpt(
+        path=Path(args.nemo_ckpt_path),
+        target="hf",
+        output_path=Path(args.output_hf_path),
+        overwrite=True,
+    )
+    if args.model_name:
+        # Copy necessary files if exist from HuggingFace for Gemma3VL model export.
+        copy_file_list = [
+            "preprocessor_config.json",
+            "chat_template.json",
+            "config.json",
+            "generation_config.json",
+            "merges.txt",
+            "tokenizer.json",
+            "tokenizer_config.json",
+            "vocab.json",
+        ]
+        for file_name in copy_file_list:
+            try:
+                downloaded_path = hf_hub_download(
+                    repo_id=args.model_name,
+                    filename=file_name,
+                    local_dir=args.output_hf_path,
+                )
+                print(f"Downloaded {downloaded_path} during export gamma3vl models.")
+            except:
+                print(f"Ignore {file_name} during export gamma3vl models.")
 
 
 if __name__ == "__main__":
-  main()
+    main()
diff --git a/scripts/vlm/gemma3vl_finetune.py b/scripts/vlm/gemma3vl_finetune.py
@@ -20,34 +20,35 @@
 --data_dir=<YOUR DATA DIR>
 """
 from scripts.vlm import gemma3vl_utils as train_utils
+
 # Need to run these filters before importing nemo.
 train_utils.filter_warnings()
 train_utils.filter_grad_bucket_logs()
 
 import argparse
 import time
+
 import torch
+
 torch.autograd.set_detect_anomaly(True)
 import os
-from lightning.pytorch.loggers import WandbLogger
-from lightning.pytorch.loggers import TensorBoardLogger
+
+from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger
 from megatron.core.distributed import DistributedDataParallelConfig
 from megatron.core.optimizer import OptimizerConfig
+from transformers import Gemma3ImageProcessor, Gemma3Processor
+
 from nemo import lightning as nl
 from nemo.collections import llm, vlm
-
 from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
 from nemo.collections.multimodal.data.energon import EnergonMultiModalDataModule
 from nemo.collections.vlm.gemma3vl.data.mock import Gemma3VLMockDataModule
+from nemo.collections.vlm.gemma3vl.data.task_encoder import TaskEncoder as Gemma3VLTaskEncoder
+from nemo.collections.vlm.gemma3vl.data.task_encoder import TaskEncoderConfig as Gemma3VLTaskEncoderConfig
 from nemo.lightning.pytorch.optim import CosineAnnealingScheduler
 from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule
-from nemo.utils.exp_manager import TimingCallback
 from nemo.utils import logging
-from nemo.collections.vlm.gemma3vl.data.task_encoder import (
-    TaskEncoder as Gemma3VLTaskEncoder,
-    TaskEncoderConfig as Gemma3VLTaskEncoderConfig,
-)
-from transformers import Gemma3ImageProcessor, Gemma3Processor
+from nemo.utils.exp_manager import TimingCallback
 
 
 def main(args):
@@ -149,18 +150,14 @@ def main(args):
         name=args.exp_name,
         ckpt=checkpoint_callback,
         tensorboard=TensorBoardLogger(save_dir="tensorboard", name=""),
-        wandb=WandbLogger(project=args.wandb_project, name=args.exp_name)
-        if args.wandb_project is not None
-        else None,
+        wandb=WandbLogger(project=args.wandb_project, name=args.exp_name) if args.wandb_project is not None else None,
     )
 
     # Auto resume setup
     resume = nl.AutoResume(
         resume_if_exists=False,
         resume_ignore_no_checkpoint=True,
-        restore_config=nl.RestoreConfig(path=args.resume_from_ckpt)
-        if args.resume_from_ckpt is not None
-        else None,
+        restore_config=nl.RestoreConfig(path=args.resume_from_ckpt) if args.resume_from_ckpt is not None else None,
     )
 
     # Optimizer and scheduler setup
@@ -205,7 +202,7 @@ def main(args):
     parser.add_argument(
         "--restore_path", type=str, required=False, default=None, help="Path to restore model from checkpoint"
     )
-    parser.add_argument("--log_dir",   type=str, required=False,  default="/logs",   help="Path to the log folder")
+    parser.add_argument("--log_dir", type=str, required=False, default="/logs", help="Path to the log folder")
     parser.add_argument("--tp_size", type=int, required=False, default=1)
     parser.add_argument("--pp_size", type=int, required=False, default=1)
     parser.add_argument("--num_nodes", type=int, required=False, default=1)
@@ -216,14 +213,20 @@ def main(args):
     parser.add_argument("--val_check_interval", type=int, required=False, default=10)
     parser.add_argument("--limit_val_batches", type=float, required=False, default=1.0)
     parser.add_argument("--lr", type=float, required=False, default=2.0e-06, help="Learning rate")
-    parser.add_argument("--hf_model_id", type=str, required=False, default="google/gemma-3-4b-it", help="HuggingFace Gemma3VL model ids")
+    parser.add_argument(
+        "--hf_model_id",
+        type=str,
+        required=False,
+        default="google/gemma-3-4b-it",
+        help="HuggingFace Gemma3VL model ids",
+    )
     parser.add_argument("--gbs", type=int, required=False, default=32, help="Global batch size")
     parser.add_argument("--mbs", type=int, required=False, default=1, help="Micro batch size")
     parser.add_argument("--save_top_k", type=int, required=False, default=1, help="Save top k")
-    parser.add_argument("--num_workers", type=int, required=False, default=2, help="The num of workers for data loader")
     parser.add_argument(
-        "--max_sequence_length", type=int, required=False, default=512, help="Maximum sequence length"
+        "--num_workers", type=int, required=False, default=2, help="The num of workers for data loader"
     )
+    parser.add_argument("--max_sequence_length", type=int, required=False, default=512, help="Maximum sequence length")
     parser.add_argument(
         "--resume_from_ckpt",
         type=str,
@@ -232,9 +235,7 @@ def main(args):
         help="Path to restore model from checkpoint",
     )
     parser.add_argument("--wandb_project", type=str, required=False, default=None)
-    parser.add_argument(
-        "--exp_name", type=str, required=False, default="gemma3vl_finetune"
-    )
+    parser.add_argument("--exp_name", type=str, required=False, default="gemma3vl_finetune")
 
     args = parser.parse_args()
     main(args)
diff --git a/scripts/vlm/gemma3vl_import.py b/scripts/vlm/gemma3vl_import.py
diff --git a/scripts/vlm/gemma3vl_utils.py b/scripts/vlm/gemma3vl_utils.py