@@ -1550,6 +1550,7 @@ def data_processing(train_args: TrainingArgs) -> None:
15501550def create_eval_job (
15511551 namespace : str ,
15521552 eval_type : str ,
1553+ judge_serving_model_secret : str ,
15531554 nproc_per_node : int = 1 ,
15541555) -> kubernetes .client .V1Job :
15551556 """
@@ -1560,6 +1561,7 @@ def create_eval_job(
15601561 Args:
15611562 namespace (str): The namespace in which the job will be created.
15621563 eval_type (str): The type of evaluation to run.
1564+ judge_serving_model_secret (str): The name of the Kubernetes Secret containing the judge
15631565 nproc_per_node (int): The number of processes per node.
15641566
15651567 Returns:
@@ -1729,7 +1731,7 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
17291731 max_workers = usable_cpu_count
17301732
17311733 # modify model_list to ignore any jsonl files present in the directory
1732- models_list = [model for model in models_list if model.endswith(".jsonl") != True ]
1734+ models_list = [model for model in models_list if not model.endswith(".jsonl")]
17331735 for model_name in models_list:
17341736 print(f"Serving candidate model: {model_name}")
17351737 model_path = f"{models_path_prefix}/{model_name}"
@@ -2275,7 +2277,7 @@ def find_node_dataset_directories(base_dir: str):
22752277 env_from = [
22762278 kubernetes .client .V1EnvFromSource (
22772279 secret_ref = kubernetes .client .V1SecretEnvSource (
2278- name = JUDGE_SERVING_NAME
2280+ name = judge_serving_model_secret
22792281 )
22802282 ),
22812283 ],
@@ -2310,7 +2312,7 @@ def find_node_dataset_directories(base_dir: str):
23102312 env_from = [
23112313 kubernetes .client .V1EnvFromSource (
23122314 secret_ref = kubernetes .client .V1SecretEnvSource (
2313- name = JUDGE_SERVING_NAME
2315+ name = judge_serving_model_secret
23142316 )
23152317 ),
23162318 ],
@@ -2854,6 +2856,9 @@ def decode_base64(data):
28542856 f"Secret { judge_serving_model_secret } not found in namespace { namespace } ."
28552857 ) from exc
28562858
2859+ # Set the judge secret in the context for the evaluation job
2860+ ctx .obj ["judge_serving_model_secret" ] = judge_serving_model_secret
2861+
28572862 # list of PVCs to create and their details
28582863 pvcs = [
28592864 {
@@ -3112,6 +3117,13 @@ def evaluation(ctx: click.Context) -> str:
31123117 namespace = ctx .obj ["namespace" ]
31133118 eval_type = ctx .obj ["eval_type" ]
31143119 dry_run = ctx .obj ["dry_run" ]
3120+ judge_serving_model_secret = ctx .obj ["judge_serving_model_secret" ]
3121+
3122+ # This should only happen if the script is called with the "evaluation" subcommand
3123+ if not judge_serving_model_secret :
3124+ raise ValueError (
3125+ "Judge serving model secret must be provided with --judge-serving-model-secret."
3126+ )
31153127
31163128 if eval_type is None :
31173129 raise ValueError (
@@ -3121,7 +3133,11 @@ def evaluation(ctx: click.Context) -> str:
31213133 logger .info ("Running %s evaluation." , eval_type )
31223134
31233135 # Create and run the evaluation job
3124- job = create_eval_job (namespace = namespace , eval_type = eval_type )
3136+ job = create_eval_job (
3137+ namespace = namespace ,
3138+ eval_type = eval_type ,
3139+ judge_serving_model_secret = judge_serving_model_secret ,
3140+ )
31253141
31263142 if dry_run :
31273143 logger .info ("Dry run: Job would be created.\n %s" , job )
@@ -3196,4 +3212,4 @@ def upload_trained_model(ctx: click.Context):
31963212 logger .info ("Failed to load kube config. Trying in-cluster config" )
31973213 kubernetes .config .load_incluster_config ()
31983214
3199- cli ()
3215+ cli ()
0 commit comments