|
| 1 | +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +# To run this recipe, please use the following command: |
| 16 | +# python examples/biencoder/finetune.py --config examples/biencoder/llama3_2_1b_biencoder.yaml |
| 17 | +# Or with torchrun for multi-GPU: |
| 18 | +# torchrun --nproc-per-node=8 examples/biencoder/finetune.py --config examples/biencoder/llama3_2_1b_biencoder.yaml |
| 19 | + |
| 20 | +seed: 42 |
| 21 | + |
| 22 | +step_scheduler: |
| 23 | + global_batch_size: 128 |
| 24 | + local_batch_size: 4 |
| 25 | + ckpt_every_steps: 500 |
| 26 | + val_every_steps: 500 |
| 27 | + num_epochs: 1 |
| 28 | + |
| 29 | +dist_env: |
| 30 | + backend: nccl |
| 31 | + timeout_minutes: 1 |
| 32 | + |
| 33 | +model: |
| 34 | + _target_: nemo_automodel.components.models.biencoder.NeMoAutoModelBiencoder.from_pretrained |
| 35 | + pretrained_model_name_or_path: meta-llama/Llama-3.2-1B |
| 36 | + share_encoder: true |
| 37 | + add_linear_pooler: false |
| 38 | + out_dimension: 768 |
| 39 | + do_gradient_checkpointing: false |
| 40 | + train_n_passages: 5 |
| 41 | + eval_negative_size: 4 |
| 42 | + pooling: avg |
| 43 | + l2_normalize: true |
| 44 | + t: 0.02 |
| 45 | + use_liger_kernel: true |
| 46 | + use_sdpa_patching: true |
| 47 | + torch_dtype: bfloat16 |
| 48 | + |
| 49 | +tokenizer: |
| 50 | + _target_: transformers.AutoTokenizer.from_pretrained |
| 51 | + pretrained_model_name_or_path: meta-llama/Llama-3.2-1B |
| 52 | + |
| 53 | +dataloader: |
| 54 | + _target_: torchdata.stateful_dataloader.StatefulDataLoader |
| 55 | + dataset: |
| 56 | + _target_: nemo_automodel.components.datasets.llm.make_retrieval_dataset |
| 57 | + data_dir_list: |
| 58 | + - training_datasets/nqsh_shuffled_50k.json |
| 59 | + - training_datasets/miracl_train_es_llama3_1b_4m_512len.json |
| 60 | + - training_datasets/mldr_en_perc95_small.json |
| 61 | + data_type: train |
| 62 | + train_n_passages: 5 |
| 63 | + eval_negative_size: 4 |
| 64 | + seed: 42 |
| 65 | + do_shuffle: true |
| 66 | + collate_fn: |
| 67 | + _target_: nemo_automodel.components.datasets.llm.RetrievalBiencoderCollator |
| 68 | + q_max_len: 512 |
| 69 | + p_max_len: 512 |
| 70 | + query_prefix: "query:" |
| 71 | + passage_prefix: "passage:" |
| 72 | + pad_to_multiple_of: 8 |
| 73 | + shuffle: true |
| 74 | + num_workers: 0 |
| 75 | + |
| 76 | +# Optional: Uncomment to enable validation |
| 77 | +# validation_dataloader: |
| 78 | +# _target_: torchdata.stateful_dataloader.StatefulDataLoader |
| 79 | +# dataset: |
| 80 | +# _target_: nemo_automodel.components.datasets.llm.make_retrieval_dataset |
| 81 | +# data_dir_list: training_datasets/validation.json |
| 82 | +# data_type: eval |
| 83 | +# train_n_passages: 5 |
| 84 | +# eval_negative_size: 4 |
| 85 | +# seed: 42 |
| 86 | +# do_shuffle: false |
| 87 | +# max_train_samples: 1000 |
| 88 | +# train_data_select_offset: 0 |
| 89 | +# collate_fn: |
| 90 | +# _target_: nemo_automodel.components.datasets.llm.RetrievalBiencoderCollator |
| 91 | +# q_max_len: 512 |
| 92 | +# p_max_len: 512 |
| 93 | +# query_prefix: "query:" |
| 94 | +# passage_prefix: "passage:" |
| 95 | +# padding: longest |
| 96 | +# pad_to_multiple_of: 8 |
| 97 | +# batch_size: 2 |
| 98 | +# shuffle: false |
| 99 | +# num_workers: 0 |
| 100 | + |
| 101 | +optimizer: |
| 102 | + _target_: transformer_engine.pytorch.optimizers.fused_adam.FusedAdam |
| 103 | + lr: 5.0e-6 |
| 104 | + weight_decay: 0.01 |
| 105 | + adam_w_mode: true |
| 106 | + bias_correction: true |
| 107 | + master_weights: true |
| 108 | + |
| 109 | +# Learning rate scheduler |
| 110 | +lr_scheduler: |
| 111 | + lr_warmup_steps: 100 |
| 112 | + |
| 113 | +checkpoint: |
| 114 | + enabled: true |
| 115 | + checkpoint_dir: ./output/llama3_2_1b_biencoder/checkpoints |
| 116 | + model_save_format: torch_save |
| 117 | + save_consolidated: false |
| 118 | + |
| 119 | +distributed: |
| 120 | + _target_: nemo_automodel.components.distributed.fsdp2.FSDP2Manager |
| 121 | + dp_size: none |
| 122 | + dp_replicate_size: 1 |
| 123 | + tp_size: 1 |
| 124 | + cp_size: 1 |
| 125 | + sequence_parallel: false |
| 126 | + |
| 127 | +# Uncomment and configure for W&B logging |
| 128 | +# wandb: |
| 129 | +# project: biencoder-finetuning |
| 130 | +# entity: your_entity |
| 131 | +# name: llama3_2_1b_biencoder |
| 132 | + |
0 commit comments