@@ -472,6 +472,65 @@ type (
472472 // ParentClosePolicy - Optional policy to decide what to do for the child.
473473 // Default is Terminate (if onboarded to this feature)
474474 ParentClosePolicy ParentClosePolicy
475+
476+ // Bugports allows opt-in enabling of older, possibly buggy behavior, primarily intended to allow temporarily
477+ // emulating old behavior until a fix is deployed.
478+ //
479+ // Bugports are always deprecated and may be removed in future versions.
480+ // Generally speaking they will *likely* remain in place for one minor version, and then they may be removed to
481+ // allow cleaning up the additional code complexity that they cause.
482+ //
483+ // deprecated
484+ Bugports Bugports
485+ }
486+
487+ // Bugports allows opt-in enabling of older, possibly buggy behavior, primarily intended to allow temporarily
488+ // emulating old behavior until a fix is deployed.
489+ // By default, bugs (especially rarely-occurring ones) are fixed and all users are opted into the new behavior.
490+ // Back-ported buggy behavior *may* be available via these flags.
491+ //
492+ // Fields in here are NOT guaranteed to be stable. They will almost certainly be removed in the next major
493+ // release, and might be removed earlier if a need arises, e.g. if the historical behavior causes too much of an
494+ // increase in code complexity.
495+ //
496+ // See each individual field for details.
497+ //
498+ // Bugports are always deprecated and may be removed in future versions.
499+ // Generally speaking they will *likely* remain in place for one minor version, and then they may be removed to
500+ // allow cleaning up the additional code complexity that they cause.
501+ //
502+ // deprecated
503+ Bugports struct {
504+ // StartChildWorkflowsOnCanceledContext allows emulating older, buggy behavior that existed prior to v0.18.4.
505+ //
506+ // Prior to the fix, child workflows would be started and keep running when their context was canceled in two
507+ // situations:
508+ // 1) when the context was canceled before ExecuteChildWorkflow is called, and
509+ // 2) when the context was canceled after ExecuteChildWorkflow but before the child workflow was started.
510+ //
511+ // 1 is unfortunately easy to trigger, though many workflows will encounter an error earlier and not reach the
512+ // child-workflow-executing code. 2 is expected to be very rare in practice.
513+ //
514+ // To permanently emulate old behavior, use a disconnected context when starting child workflows, and
515+ // cancel it only after `childfuture.GetWorkflowExecution().Get(...)` returns. This can be used when this flag
516+ // is removed in the future.
517+ //
518+ // If you have currently-broken workflows and need to repair them, there are two primary options:
519+ //
520+ // 1: Check the BinaryChecksum value of your new deploy and/or of the decision that is currently failing
521+ // workflows. Then set this flag when replaying history on those not-fixed checksums. Concretely, this means
522+ // checking both `workflow.GetInfo(ctx).BinaryChecksum` (note that sufficiently old clients may not have
523+ // recorded a value, and it may be nil) and `workflow.IsReplaying(ctx)`.
524+ //
525+ // 2: Reset broken workflows back to either before the buggy behavior was recorded, or before the fixed behavior
526+ // was deployed. A "bad binary" reset type can do the latter in bulk, see the CLI's
527+ // `cadence workflow reset-batch --reset_type BadBinary --help` for details. For the former, check the failing
528+ // histories, identify the point at which the bug occurred, and reset to prior to that decision task.
529+ //
530+ // Added in 0.18.4, this may be removed in or after v0.19.0, so please migrate off of it ASAP.
531+ //
532+ // deprecated
533+ StartChildWorkflowsOnCanceledContext bool
475534 }
476535)
477536
@@ -896,15 +955,23 @@ func (wc *workflowEnvironmentInterceptor) ExecuteChildWorkflow(ctx Context, chil
896955 decodeFutureImpl : mainFuture .(* decodeFutureImpl ),
897956 executionFuture : executionFuture .(* futureImpl ),
898957 }
958+ // clients prior to v0.18.4 would incorrectly start child workflows that were started with cancelled contexts,
959+ // and did not react to cancellation between requested and started.
960+ correctChildCancellation := true
961+ workflowOptionsFromCtx := getWorkflowEnvOptions (ctx )
899962
900963 // Starting with a canceled context should immediately fail, no need to even try.
901964 if ctx .Err () != nil {
902- mainSettable .SetError (ctx .Err ())
903- executionSettable .SetError (ctx .Err ())
904- return result
965+ if workflowOptionsFromCtx .bugports .StartChildWorkflowsOnCanceledContext {
966+ // backport the bug
967+ correctChildCancellation = false
968+ } else {
969+ mainSettable .SetError (ctx .Err ())
970+ executionSettable .SetError (ctx .Err ())
971+ return result
972+ }
905973 }
906974
907- workflowOptionsFromCtx := getWorkflowEnvOptions (ctx )
908975 dc := workflowOptionsFromCtx .dataConverter
909976 env := getWorkflowEnvironment (ctx )
910977 wfType , input , err := getValidatedWorkflowFunction (childWorkflowType , args , dc , env .GetRegistry ())
@@ -951,7 +1018,11 @@ func (wc *workflowEnvironmentInterceptor) ExecuteChildWorkflow(ctx Context, chil
9511018
9521019 // forward the delayed cancellation if necessary
9531020 if shouldCancelAsync && e == nil && ! mainFuture .IsReady () {
954- getWorkflowEnvironment (ctx ).RequestCancelChildWorkflow (* options .domain , childWorkflowExecution .ID )
1021+ if workflowOptionsFromCtx .bugports .StartChildWorkflowsOnCanceledContext {
1022+ // do nothing: buggy behavior did not forward the cancellation
1023+ } else {
1024+ getWorkflowEnvironment (ctx ).RequestCancelChildWorkflow (* options .domain , childWorkflowExecution .ID )
1025+ }
9551026 }
9561027 })
9571028
@@ -967,7 +1038,7 @@ func (wc *workflowEnvironmentInterceptor) ExecuteChildWorkflow(ctx Context, chil
9671038 if childWorkflowExecution != nil && ! mainFuture .IsReady () {
9681039 // child workflow started, and ctx cancelled. forward cancel to the child.
9691040 getWorkflowEnvironment (ctx ).RequestCancelChildWorkflow (* options .domain , childWorkflowExecution .ID )
970- } else if childWorkflowExecution == nil {
1041+ } else if childWorkflowExecution == nil && correctChildCancellation {
9711042 // decision to start the child has been made, but it has not yet started.
9721043
9731044 // TODO: ideal, but not strictly necessary for correctness:
@@ -1294,6 +1365,7 @@ func WithChildWorkflowOptions(ctx Context, cwo ChildWorkflowOptions) Context {
12941365 wfOptions .memo = cwo .Memo
12951366 wfOptions .searchAttributes = cwo .SearchAttributes
12961367 wfOptions .parentClosePolicy = cwo .ParentClosePolicy
1368+ wfOptions .bugports = cwo .Bugports
12971369
12981370 return ctx1
12991371}
0 commit comments