Skip to content

Commit cbf4a40

Browse files
MichaelCliffordleseb
authored andcommitted
incorporate #117 from redhat-et/ilab-on-ocp
Signed-off-by: Michael Clifford <[email protected]> Co-authored-by: Michael Clifford <[email protected]> Co-authored-by: Sébastien Han <[email protected]>
1 parent 1fb06b0 commit cbf4a40

File tree

1 file changed

+21
-5
lines changed

1 file changed

+21
-5
lines changed

instructlab/standalone/standalone.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1550,6 +1550,7 @@ def data_processing(train_args: TrainingArgs) -> None:
15501550
def create_eval_job(
15511551
namespace: str,
15521552
eval_type: str,
1553+
judge_serving_model_secret: str,
15531554
nproc_per_node: int = 1,
15541555
) -> kubernetes.client.V1Job:
15551556
"""
@@ -1560,6 +1561,7 @@ def create_eval_job(
15601561
Args:
15611562
namespace (str): The namespace in which the job will be created.
15621563
eval_type (str): The type of evaluation to run.
1564+
judge_serving_model_secret (str): The name of the Kubernetes Secret containing the judge
15631565
nproc_per_node (int): The number of processes per node.
15641566
15651567
Returns:
@@ -1729,7 +1731,7 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
17291731
max_workers = usable_cpu_count
17301732
17311733
# modify model_list to ignore any jsonl files present in the directory
1732-
models_list = [model for model in models_list if model.endswith(".jsonl") != True]
1734+
models_list = [model for model in models_list if not model.endswith(".jsonl")]
17331735
for model_name in models_list:
17341736
print(f"Serving candidate model: {model_name}")
17351737
model_path = f"{models_path_prefix}/{model_name}"
@@ -2275,7 +2277,7 @@ def find_node_dataset_directories(base_dir: str):
22752277
env_from=[
22762278
kubernetes.client.V1EnvFromSource(
22772279
secret_ref=kubernetes.client.V1SecretEnvSource(
2278-
name=JUDGE_SERVING_NAME
2280+
name=judge_serving_model_secret
22792281
)
22802282
),
22812283
],
@@ -2310,7 +2312,7 @@ def find_node_dataset_directories(base_dir: str):
23102312
env_from=[
23112313
kubernetes.client.V1EnvFromSource(
23122314
secret_ref=kubernetes.client.V1SecretEnvSource(
2313-
name=JUDGE_SERVING_NAME
2315+
name=judge_serving_model_secret
23142316
)
23152317
),
23162318
],
@@ -2854,6 +2856,9 @@ def decode_base64(data):
28542856
f"Secret {judge_serving_model_secret} not found in namespace {namespace}."
28552857
) from exc
28562858

2859+
# Set the judge secret in the context for the evaluation job
2860+
ctx.obj["judge_serving_model_secret"] = judge_serving_model_secret
2861+
28572862
# list of PVCs to create and their details
28582863
pvcs = [
28592864
{
@@ -3112,6 +3117,13 @@ def evaluation(ctx: click.Context) -> str:
31123117
namespace = ctx.obj["namespace"]
31133118
eval_type = ctx.obj["eval_type"]
31143119
dry_run = ctx.obj["dry_run"]
3120+
judge_serving_model_secret = ctx.obj["judge_serving_model_secret"]
3121+
3122+
# This should only happen if the script is called with the "evaluation" subcommand
3123+
if not judge_serving_model_secret:
3124+
raise ValueError(
3125+
"Judge serving model secret must be provided with --judge-serving-model-secret."
3126+
)
31153127

31163128
if eval_type is None:
31173129
raise ValueError(
@@ -3121,7 +3133,11 @@ def evaluation(ctx: click.Context) -> str:
31213133
logger.info("Running %s evaluation.", eval_type)
31223134

31233135
# Create and run the evaluation job
3124-
job = create_eval_job(namespace=namespace, eval_type=eval_type)
3136+
job = create_eval_job(
3137+
namespace=namespace,
3138+
eval_type=eval_type,
3139+
judge_serving_model_secret=judge_serving_model_secret,
3140+
)
31253141

31263142
if dry_run:
31273143
logger.info("Dry run: Job would be created.\n%s", job)
@@ -3196,4 +3212,4 @@ def upload_trained_model(ctx: click.Context):
31963212
logger.info("Failed to load kube config. Trying in-cluster config")
31973213
kubernetes.config.load_incluster_config()
31983214

3199-
cli()
3215+
cli()

0 commit comments

Comments
 (0)