diff --git a/src/modules/job-manager/plugins/limit-duration.c b/src/modules/job-manager/plugins/limit-duration.c index 7735d4a79905..656f9077f939 100644 --- a/src/modules/job-manager/plugins/limit-duration.c +++ b/src/modules/job-manager/plugins/limit-duration.c @@ -193,17 +193,24 @@ static int check_limit (struct limit_duration *ctx, { double limit = ctx->general_limit; double qlimit = queues_lookup (ctx->queues, queue); + bool unlimited = duration == DURATION_UNLIMITED; if (qlimit != DURATION_INVALID) limit = qlimit; if (limit != DURATION_INVALID && limit != DURATION_UNLIMITED - && (duration > limit || duration == DURATION_UNLIMITED)) { + && (duration > limit || unlimited)) { + char requested[64]; char fsd[64]; + fsd_format_duration_ex (requested, sizeof (requested), duration, 2); fsd_format_duration_ex (fsd, sizeof (fsd), limit, 2); return errprintf (error, - "requested duration exceeds policy limit of %s", - fsd); + "requested duration (%s) exceeds policy limit of " + "%s%s%s", + unlimited ? "unlimited" : requested, + fsd, + queue ? " for queue " : "", + queue ? queue : ""); } return 0; } diff --git a/src/modules/job-manager/plugins/limit-job-size.c b/src/modules/job-manager/plugins/limit-job-size.c index a27cc49161dc..d05300b013cd 100644 --- a/src/modules/job-manager/plugins/limit-job-size.c +++ b/src/modules/job-manager/plugins/limit-job-size.c @@ -297,6 +297,45 @@ static int queues_parse (zhashx_t **zhp, return -1; } +static int check_limit (const char *queue, + const char *resource, + bool over, + int limit, + int value, + flux_error_t *errp) +{ + if ((over && LIMIT_OVER (limit, value)) + || (!over && LIMIT_UNDER (limit, value))) + return errprintf (errp, + "requested %s (%d) %s policy limit of %d%s%s", + resource, + value, + over ? "exceeds" : "is under", + limit, + queue ? " for queue ": "", + queue ? queue : ""); + return 0; +} + +static int check_over (const char *queue, + const char *resource, + int limit, + int value, + flux_error_t *errp) +{ + return check_limit (queue, resource, true, limit, value, errp); +} + +static int check_under (const char *queue, + const char *resource, + int limit, + int value, + flux_error_t *errp) +{ + return check_limit (queue, resource, false, limit, value, errp); +} + + static int check_limits (struct limit_job_size *ctx, struct jj_counts *counts, const char *queue, @@ -305,41 +344,21 @@ static int check_limits (struct limit_job_size *ctx, struct limits limits; struct limits *queue_limits; + int nnodes = counts->nnodes; + int ncores = counts->nslots * counts->slot_size; + int ngpus = counts->nslots * counts->slot_gpus; + limits = ctx->general_limits; if (queue && (queue_limits = queues_lookup (ctx->queues, queue))) limits_override (&limits, queue_limits); - if (LIMIT_OVER (limits.max.nnodes, counts->nnodes)) { - return errprintf (error, - "requested nnodes exceeds policy limit of %d", - limits.max.nnodes); - } - if (LIMIT_OVER (limits.max.ncores, counts->nslots * counts->slot_size)) { - return errprintf (error, - "requested ncores exceeds policy limit of %d", - limits.max.ncores); - } - if (LIMIT_OVER (limits.max.ngpus, counts->nslots * counts->slot_gpus)) { - return errprintf (error, - "requested ngpus exceeds policy limit of %d", - limits.max.ngpus); - } - if (LIMIT_UNDER (limits.min.nnodes, counts->nnodes)) { - return errprintf (error, - "requested nnodes is under policy limit of %d", - limits.min.nnodes); - } - if (LIMIT_UNDER (limits.min.ncores, counts->nslots * counts->slot_size)) { - return errprintf (error, - "requested ncores is under policy limit of %d", - limits.min.ncores); - } - if (LIMIT_UNDER (limits.min.ngpus, counts->nslots * counts->slot_gpus)) { - return errprintf (error, - "requested ngpus is under policy limit of %d", - limits.min.ngpus); - } - + if (check_over (queue, "nnodes", limits.max.nnodes, nnodes, error) < 0 + || check_over (queue, "ncores", limits.max.ncores, ncores, error) < 0 + || check_over (queue, "ngpus", limits.max.ngpus, ngpus, error) < 0 + || check_under (queue, "nnodes", limits.min.nnodes, nnodes, error) < 0 + || check_under (queue, "ncores", limits.min.ncores, ncores, error) < 0 + || check_under (queue, "ngpus", limits.min.ngpus, ngpus, error) < 0) + return -1; return 0; } diff --git a/t/t2221-job-manager-limit-duration.t b/t/t2221-job-manager-limit-duration.t index 16b482ebbdef..f0706a6c2e4d 100755 --- a/t/t2221-job-manager-limit-duration.t +++ b/t/t2221-job-manager-limit-duration.t @@ -40,10 +40,18 @@ test_expect_success 'configure policy.limits.duration and queue duration' ' flux queue start --all ' test_expect_success 'a job that exceeds policy.limits.duration is rejected' ' - test_must_fail flux submit --queue=debug -t 2h true + test_must_fail flux submit --queue=debug -t 2h true 2>limit.err && + test_debug "cat limit.err" +' +test_expect_success 'error message includes expected details' ' + grep "duration (2h) exceeds.*limit of 1h for queue debug" limit.err ' test_expect_success 'a job with no limit is also rejected' ' - test_must_fail flux submit --queue=debug -t 0 true + test_must_fail flux submit --queue=debug -t 0 true 2>limit2.err && + test_debug "cat limit2.err" +' +test_expect_success 'error message includes expected details' ' + grep "duration (unlimited) exceeds.*limit of 1h for queue debug" limit2.err ' test_expect_success 'but is accepted by a queue with higher limit' ' flux submit \ diff --git a/t/t2222-job-manager-limit-job-size.t b/t/t2222-job-manager-limit-job-size.t index 8a6cd6ba7bb7..2da6c470fd48 100755 --- a/t/t2222-job-manager-limit-job-size.t +++ b/t/t2222-job-manager-limit-job-size.t @@ -82,7 +82,11 @@ test_expect_success 'a job is accepted if under general gpu limit' ' flux submit --queue=debug -n1 true ' test_expect_success 'a job is rejected if over gpu limit' ' - test_must_fail flux submit --queue=debug -n1 -g1 true + test_must_fail flux submit --queue=debug -n1 -g1 true 2>debug.err && + test_debug "cat debug.err" +' +test_expect_success 'error message contains queue name' ' + grep "for queue debug" debug.err ' test_expect_success 'same job is accepted with unlimited queue override' ' flux submit --queue=batch -n1 -g1 true diff --git a/t/t2290-job-update.t b/t/t2290-job-update.t index 1cfcf4fae3ab..ac3858edf687 100755 --- a/t/t2290-job-update.t +++ b/t/t2290-job-update.t @@ -149,7 +149,7 @@ test_expect_success 'reload update-duration plugin with owner-allow-any=0' ' test_expect_success 'update duration above policy limit now fails' ' test_expect_code 1 flux update $jobid duration=1h 2>limit.err && test_debug "cat limit.err" && - grep "requested duration exceeds policy limit" limit.err + grep "requested duration.*exceeds policy limit" limit.err ' test_expect_success 'update fails for running job' ' jobid=$(flux submit -t1m --wait-event=start sleep 60) && diff --git a/t/t2291-job-update-queue.t b/t/t2291-job-update-queue.t index 5cb7dd0db096..56844c295fdd 100755 --- a/t/t2291-job-update-queue.t +++ b/t/t2291-job-update-queue.t @@ -80,7 +80,7 @@ test_expect_success 'job runs on batch resources' ' test_expect_success 'update to queue with lower duration limit fails' ' jobid=$(flux submit -q batch --urgency=hold hostname) && test_must_fail flux update $jobid queue=debug 2>queue.err && - grep "duration exceeds policy limit" queue.err + grep "duration.*exceeds policy limit" queue.err ' test_expect_success 'update of duration allows queue update' ' flux update $jobid queue=debug duration=1m &&