Skip to content

Commit 93e43e5

Browse files
carlydfShivs11ShahabT
authored
Improve versioning error messages, return NotFound gRPC error for all not found errors (#8641)
## What changed? Improve versioning error messages, return NotFound gRPC error for all not found errors ## Why? So that users can see if they mispelled a deployment name or build id, and generally know which versions are involved when there is an error ## How did you test it? - [ ] built - [ ] run locally and tested manually - [x] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks Changing the error returned only affected replay if the type of error changes, not the string. I am keeping the type the same in the few places where I had to change the error string, so it won't affect workflow replay. Error messages are not compared when checking order of commands in workflow replay check in Go SDK here: https://github.com/temporalio/sdk-go/blob/ad990a3bfe17560c82cf84c80dfdf6c41eb43580/internal/internal_task_handlers.go#L1616 <!-- CURSOR_SUMMARY --> --- > [!NOTE] > Standardizes NotFound errors and makes worker deployment/version errors more informative (include deployment/build IDs), with corresponding workflow validations and test updates. > > - **Worker Deployment (server)**: > - **Error Semantics**: Use `NotFound` for missing deployments/versions (e.g., `ErrWorkerDeploymentNotFound`, `ErrWorkerDeploymentVersionNotFound`), replacing prior `FailedPrecondition` in those cases. > - **Richer Messages**: Parameterize errors with deployment/version info (`build_id`, deployment name); add formatted messages for draining/has-pollers/current-or-ramping; introduce suffix constants and string matching to map workflow failures to precise non-retryable errors. > - **APIs Affected**: `SetCurrentVersion`, `SetRampingVersion`, `DescribeVersion`, `DeleteVersionFromWorkerDeployment`, and update handling (`handleUpdateVersionFailures`). > - **Validation Updates (workflow)**: Return `FailedPreconditionf` with version details for missing task queues, manager identity mismatch, and current/ramping delete checks. > - **Tests**: > - Adjust expectations to new `NotFound` usage and formatted messages; update assertions to use `FailedPreconditionf` message strings. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 5bb0b22. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Co-authored-by: Shivam Saraf <[email protected]> Co-authored-by: ShahabT <[email protected]>
1 parent 2e7b2c7 commit 93e43e5

File tree

6 files changed

+43
-31
lines changed

6 files changed

+43
-31
lines changed

service/worker/workerdeployment/client.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"sort"
8+
"strings"
89
"time"
910

1011
"github.com/dgryski/go-farm"
@@ -339,9 +340,9 @@ func newResourceExhaustedError(message string) *serviceerror.ResourceExhausted {
339340
}
340341
}
341342

342-
func (d *ClientImpl) handleUpdateVersionFailures(outcome *updatepb.Outcome) error {
343+
func (d *ClientImpl) handleUpdateVersionFailures(outcome *updatepb.Outcome, deploymentName, buildID string) error {
343344
if failure := outcome.GetFailure(); failure.GetApplicationFailureInfo().GetType() == errVersionNotFound {
344-
return serviceerror.NewNotFound(errVersionNotFound)
345+
return serviceerror.NewNotFoundf(ErrWorkerDeploymentVersionNotFound, buildID, deploymentName)
345346
} else if failure.GetApplicationFailureInfo().GetType() == errFailedPrecondition {
346347
return serviceerror.NewFailedPrecondition(failure.Message)
347348
} else if failure != nil {
@@ -705,7 +706,7 @@ func (d *ClientImpl) SetCurrentVersion(
705706
if err != nil {
706707
var notFound *serviceerror.NotFound
707708
if errors.As(err, &notFound) {
708-
return nil, serviceerror.NewFailedPreconditionf(ErrWorkerDeploymentNotFound, deploymentName)
709+
return nil, serviceerror.NewNotFoundf(ErrWorkerDeploymentNotFound, deploymentName)
709710
}
710711
return nil, err
711712
}
@@ -720,7 +721,7 @@ func (d *ClientImpl) SetCurrentVersion(
720721
res.ConflictToken = details[0].GetData()
721722
}
722723
return &res, nil
723-
} else if updateErr := d.handleUpdateVersionFailures(outcome); updateErr != nil {
724+
} else if updateErr := d.handleUpdateVersionFailures(outcome, deploymentName, versionObj.GetBuildId()); updateErr != nil {
724725
return nil, updateErr
725726
} else if registerErr := d.handleRegisterVersionFailures(outcome); registerErr != nil {
726727
return nil, registerErr
@@ -819,7 +820,7 @@ func (d *ClientImpl) SetRampingVersion(
819820
if err != nil {
820821
var notFound *serviceerror.NotFound
821822
if errors.As(err, &notFound) {
822-
return nil, serviceerror.NewFailedPreconditionf(ErrWorkerDeploymentNotFound, deploymentName)
823+
return nil, serviceerror.NewNotFoundf(ErrWorkerDeploymentNotFound, deploymentName)
823824
}
824825
return nil, err
825826
}
@@ -837,7 +838,7 @@ func (d *ClientImpl) SetRampingVersion(
837838
}
838839

839840
return &res, nil
840-
} else if updateErr := d.handleUpdateVersionFailures(outcome); updateErr != nil {
841+
} else if updateErr := d.handleUpdateVersionFailures(outcome, deploymentName, versionObj.GetBuildId()); updateErr != nil {
841842
return nil, updateErr
842843
} else if registerErr := d.handleRegisterVersionFailures(outcome); registerErr != nil {
843844
return nil, registerErr
@@ -1154,10 +1155,10 @@ func (d *ClientImpl) DeleteVersionFromWorkerDeployment(
11541155
}
11551156

11561157
if failure := outcome.GetFailure(); failure != nil {
1157-
if failure.Message == ErrVersionIsDraining {
1158-
return temporal.NewNonRetryableApplicationError(ErrVersionIsDraining, errFailedPrecondition, nil) // non-retryable error to stop multiple activity attempts
1159-
} else if failure.Message == ErrVersionHasPollers {
1160-
return temporal.NewNonRetryableApplicationError(ErrVersionHasPollers, errFailedPrecondition, nil) // non-retryable error to stop multiple activity attempts
1158+
if strings.Contains(failure.Message, errVersionIsDrainingSuffix) {
1159+
return temporal.NewNonRetryableApplicationError(fmt.Sprintf(ErrVersionIsDraining, worker_versioning.WorkerDeploymentVersionToStringV32(versionObj)), errFailedPrecondition, nil) // non-retryable error to stop multiple activity attempts
1160+
} else if strings.Contains(failure.Message, errVersionHasPollersSuffix) {
1161+
return temporal.NewNonRetryableApplicationError(fmt.Sprintf(ErrVersionHasPollers, worker_versioning.WorkerDeploymentVersionToStringV32(versionObj)), errFailedPrecondition, nil) // non-retryable error to stop multiple activity attempts
11611162
}
11621163
return serviceerror.NewInternal(failure.Message)
11631164
}

service/worker/workerdeployment/util.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,17 @@ const (
7373
errConflictTokenMismatchType = "errConflictTokenMismatch"
7474
errFailedPrecondition = "FailedPrecondition"
7575

76-
ErrVersionIsDraining = "Version cannot be deleted since it is draining."
77-
ErrVersionHasPollers = "Version cannot be deleted since it has active pollers."
78-
ErrVersionIsCurrentOrRamping = "Version cannot be deleted since it is current or ramping."
79-
80-
ErrRampingVersionDoesNotHaveAllTaskQueues = "proposed ramping version is missing active task queues from the current version; these would become unversioned if it is set as the ramping version"
81-
ErrCurrentVersionDoesNotHaveAllTaskQueues = "proposed current version is missing active task queues from the current version; these would become unversioned if it is set as the current version"
76+
errVersionIsDrainingSuffix = "cannot be deleted since it is draining"
77+
ErrVersionIsDraining = "version '%s' " + errVersionIsDrainingSuffix
78+
errVersionHasPollersSuffix = "cannot be deleted since it has active pollers"
79+
ErrVersionHasPollers = "version '%s' " + errVersionHasPollersSuffix
80+
ErrVersionIsCurrentOrRamping = "version '%s' cannot be deleted since it is current or ramping"
81+
82+
ErrRampingVersionDoesNotHaveAllTaskQueues = "proposed ramping version '%s' is missing active task queues from the current version; these would become unversioned if it is set as the ramping version"
83+
ErrCurrentVersionDoesNotHaveAllTaskQueues = "proposed current version '%s' is missing active task queues from the current version; these would become unversioned if it is set as the current version"
8284
ErrManagerIdentityMismatch = "ManagerIdentity '%s' is set and does not match user identity '%s'; to proceed, set your own identity as the ManagerIdentity, remove the ManagerIdentity, or wait for the other client to do so"
83-
ErrWorkerDeploymentNotFound = "no Worker Deployment found with name %s; does your Worker Deployment have pollers?"
84-
ErrWorkerDeploymentVersionNotFound = "build ID %s not fount in Worker Deployment %s"
85+
ErrWorkerDeploymentNotFound = "no Worker Deployment found with name '%s'; does your Worker Deployment have pollers?"
86+
ErrWorkerDeploymentVersionNotFound = "build ID '%s' not found in Worker Deployment '%s'"
8587
)
8688

8789
var (

service/worker/workerdeployment/workflow.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,11 @@ func (d *WorkflowRunner) setRamp(
720720
return err
721721
}
722722
if isMissingTaskQueues {
723-
return serviceerror.NewFailedPrecondition(ErrRampingVersionDoesNotHaveAllTaskQueues)
723+
newRampingVersionObj, _ := worker_versioning.WorkerDeploymentVersionFromStringV31(newRampingVersion)
724+
return serviceerror.NewFailedPreconditionf(
725+
ErrRampingVersionDoesNotHaveAllTaskQueues,
726+
worker_versioning.WorkerDeploymentVersionToStringV32(newRampingVersionObj),
727+
)
724728
}
725729
}
726730

@@ -814,13 +818,14 @@ func (d *WorkflowRunner) validateDeleteVersion(args *deploymentspb.DeleteVersion
814818
// deployment workflow because that's the source of truth for routing config.
815819
//nolint:staticcheck // SA1019: worker versioning v0.31
816820
if d.State.RoutingConfig.CurrentVersion == args.Version || d.State.RoutingConfig.RampingVersion == args.Version {
821+
versionObj, _ := worker_versioning.WorkerDeploymentVersionFromStringV31(args.Version)
817822
// activity won't retry on this error since version not eligible for deletion
818-
return serviceerror.NewFailedPrecondition(ErrVersionIsCurrentOrRamping)
823+
return serviceerror.NewFailedPreconditionf(ErrVersionIsCurrentOrRamping, worker_versioning.WorkerDeploymentVersionToStringV32(versionObj))
819824
}
820825

821826
// Ignore the manager identity check if the delete operation is initiated by the server internally
822827
if !args.GetServerDelete() && d.State.ManagerIdentity != "" && d.State.ManagerIdentity != args.Identity {
823-
return serviceerror.NewFailedPrecondition(fmt.Sprintf(ErrManagerIdentityMismatch, d.State.ManagerIdentity, args.Identity))
828+
return serviceerror.NewFailedPreconditionf(ErrManagerIdentityMismatch, d.State.ManagerIdentity, args.Identity)
824829
}
825830
return nil
826831
}
@@ -1012,7 +1017,11 @@ func (d *WorkflowRunner) handleSetCurrent(ctx workflow.Context, args *deployment
10121017
return nil, err
10131018
}
10141019
if isMissingTaskQueues {
1015-
return nil, serviceerror.NewFailedPrecondition(ErrCurrentVersionDoesNotHaveAllTaskQueues)
1020+
newCurrentVersionObj, _ := worker_versioning.WorkerDeploymentVersionFromStringV31(newCurrentVersion)
1021+
return nil, serviceerror.NewFailedPreconditionf(
1022+
ErrCurrentVersionDoesNotHaveAllTaskQueues,
1023+
worker_versioning.WorkerDeploymentVersionToStringV32(newCurrentVersionObj),
1024+
)
10161025
}
10171026
}
10181027

service/worker/workerdeployment/workflow_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ func (s *WorkerDeploymentSuite) Test_DeleteVersion_FailsWhenCurrentOrRamping() {
953953
s.env.UpdateWorkflow(DeleteVersion, "", &testsuite.TestUpdateCallback{
954954
OnReject: func(err error) {
955955
// The validator should reject this update
956-
s.Require().ErrorContains(err, ErrVersionIsCurrentOrRamping)
956+
s.Require().ErrorContains(err, fmt.Sprintf(ErrVersionIsCurrentOrRamping, tv.DeploymentVersionStringV32()))
957957
},
958958
OnAccept: func() {
959959
s.Fail("delete version should have been rejected by validator")

tests/versioning_3_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2695,7 +2695,7 @@ func (s *Versioning3Suite) setCurrentDeployment(tv *testvars.TestVars) {
26952695
}
26962696
_, err := s.FrontendClient().SetWorkerDeploymentCurrentVersion(ctx, req)
26972697
var notFound *serviceerror.NotFound
2698-
if errors.As(err, &notFound) || (err != nil && strings.Contains(err.Error(), workerdeployment.ErrCurrentVersionDoesNotHaveAllTaskQueues)) {
2698+
if errors.As(err, &notFound) || (err != nil && strings.Contains(err.Error(), serviceerror.NewFailedPreconditionf(workerdeployment.ErrCurrentVersionDoesNotHaveAllTaskQueues, tv.DeploymentVersionStringV32()).Error())) {
26992699
return false
27002700
}
27012701
s.NoError(err)
@@ -2748,7 +2748,7 @@ func (s *Versioning3Suite) setRampingDeployment(
27482748
}
27492749
_, err := s.FrontendClient().SetWorkerDeploymentRampingVersion(ctx, req)
27502750
var notFound *serviceerror.NotFound
2751-
if errors.As(err, &notFound) || errors.Is(err, serviceerror.NewFailedPrecondition(workerdeployment.ErrRampingVersionDoesNotHaveAllTaskQueues)) {
2751+
if errors.As(err, &notFound) || (err != nil && strings.Contains(err.Error(), serviceerror.NewFailedPreconditionf(workerdeployment.ErrRampingVersionDoesNotHaveAllTaskQueues, tv.DeploymentVersionStringV32()).Error())) {
27522752
return false
27532753
}
27542754
s.NoError(err)

tests/worker_deployment_version_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_DeleteCurrentVersion() {
515515
s.Nil(err)
516516

517517
// Deleting this version should fail since the version is current
518-
s.tryDeleteVersion(ctx, tv1, workerdeployment.ErrVersionIsCurrentOrRamping, false)
518+
s.tryDeleteVersion(ctx, tv1, fmt.Sprintf(workerdeployment.ErrVersionIsCurrentOrRamping, tv1.DeploymentVersionStringV32()), false)
519519

520520
// Verifying workflow is not in a locked state after an invalid delete request such as the one above. If the workflow were in a locked
521521
// state, the passed context would have timed out making the following operation fail.
@@ -545,7 +545,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_DeleteRampedVersion() {
545545
s.Nil(err)
546546

547547
// Deleting this version should fail since the version is ramping
548-
s.tryDeleteVersion(ctx, tv1, workerdeployment.ErrVersionIsCurrentOrRamping, false)
548+
s.tryDeleteVersion(ctx, tv1, fmt.Sprintf(workerdeployment.ErrVersionIsCurrentOrRamping, tv1.DeploymentVersionStringV32()), false)
549549

550550
// Verifying workflow is not in a locked state after an invalid delete request such as the one above. If the workflow were in a locked
551551
// state, the passed context would have timed out making the following operation fail.
@@ -622,7 +622,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_DrainingVersion() {
622622
}, enumspb.WORKER_DEPLOYMENT_VERSION_STATUS_DRAINING, false, false)
623623

624624
// delete should fail
625-
s.tryDeleteVersion(ctx, tv1, workerdeployment.ErrVersionIsDraining, false)
625+
s.tryDeleteVersion(ctx, tv1, fmt.Sprintf(workerdeployment.ErrVersionIsDraining, tv1.DeploymentVersionStringV32()), false)
626626

627627
}
628628

@@ -650,7 +650,7 @@ func (s *DeploymentVersionSuite) TestDeleteVersion_Drained_But_Pollers_Exist() {
650650
s.signalAndWaitForDrained(ctx, tv1)
651651

652652
// Version will bypass "drained" check but delete should still fail since we have active pollers.
653-
s.tryDeleteVersion(ctx, tv1, workerdeployment.ErrVersionHasPollers, false)
653+
s.tryDeleteVersion(ctx, tv1, fmt.Sprintf(workerdeployment.ErrVersionHasPollers, tv1.DeploymentVersionStringV32()), false)
654654
}
655655

656656
func (s *DeploymentVersionSuite) signalAndWaitForDrained(ctx context.Context, tv *testvars.TestVars) {
@@ -936,7 +936,7 @@ func (s *DeploymentVersionSuite) TestVersionMissingTaskQueues_InvalidSetCurrentV
936936

937937
// SetCurrent should fail since task_queue_1 does not have a current version than the deployment's existing current version
938938
// and it either has a backlog of tasks being present or an add rate > 0.
939-
s.EqualError(err, workerdeployment.ErrCurrentVersionDoesNotHaveAllTaskQueues)
939+
s.EqualError(err, fmt.Sprintf(workerdeployment.ErrCurrentVersionDoesNotHaveAllTaskQueues, tv2.DeploymentVersionStringV32()))
940940
}
941941

942942
func (s *DeploymentVersionSuite) TestVersionMissingTaskQueues_ValidSetCurrentVersion() {
@@ -994,7 +994,7 @@ func (s *DeploymentVersionSuite) TestVersionMissingTaskQueues_InvalidSetRampingV
994994

995995
// SetRampingVersion should fail since task_queue_1 does not have a current version than the deployment's existing current version
996996
// and it either has a backlog of tasks being present or an add rate > 0.
997-
s.EqualError(err, workerdeployment.ErrRampingVersionDoesNotHaveAllTaskQueues)
997+
s.EqualError(err, fmt.Sprintf(workerdeployment.ErrRampingVersionDoesNotHaveAllTaskQueues, tv2.DeploymentVersionStringV32()))
998998
}
999999

10001000
func (s *DeploymentVersionSuite) TestVersionMissingTaskQueues_ValidSetRampingVersion() {

0 commit comments

Comments
 (0)