Skip to content

Commit 779a4fa

Browse files
author
Shivam Mukhade
committed
Reconciler: Retry reporting provider status if failed
reconciler will retry to report provider status if failed now with a backoff time. It will try 3 times. if it still fails for reporting the final status then it updates the `state` to `failed`. so that those pipelineruns can be queried. Signed-off-by: Shivam Mukhade <[email protected]>
1 parent 1d39fbd commit 779a4fa

File tree

5 files changed

+67
-8
lines changed

5 files changed

+67
-8
lines changed

pkg/kubeinteraction/labels.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ const (
1616
StateStarted = "started"
1717
StateQueued = "queued"
1818
StateCompleted = "completed"
19+
StateFailed = "failed"
1920
)
2021

2122
func AddLabelsAndAnnotations(event *info.Event, pipelineRun *tektonv1beta1.PipelineRun, repo *apipac.Repository, providerinfo *info.ProviderConfig) {

pkg/reconciler/reconciler.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ var (
4444
func (r *Reconciler) ReconcileKind(ctx context.Context, pr *v1beta1.PipelineRun) pkgreconciler.Event {
4545
logger := logging.FromContext(ctx)
4646

47-
// if pipelineRun is in completed state then return
47+
// if pipelineRun is in completed or failed state then return
4848
state, exist := pr.GetLabels()[keys.State]
49-
if exist && state == kubeinteraction.StateCompleted {
49+
if exist && (state == kubeinteraction.StateCompleted || state == kubeinteraction.StateFailed) {
5050
return nil
5151
}
5252

@@ -113,16 +113,18 @@ func (r *Reconciler) reportFinalStatus(ctx context.Context, logger *zap.SugaredL
113113
return repo, fmt.Errorf("cannot clean prs: %w", err)
114114
}
115115

116+
finalState := kubeinteraction.StateCompleted
116117
newPr, err := r.postFinalStatus(ctx, logger, provider, event, pr)
117118
if err != nil {
118-
return repo, fmt.Errorf("cannot post final status: %w", err)
119+
logger.Errorf("failed to post final status, moving on: %v", err)
120+
finalState = kubeinteraction.StateFailed
119121
}
120122

121123
if err := r.updateRepoRunStatus(ctx, logger, newPr, repo, event); err != nil {
122124
return repo, fmt.Errorf("cannot update run status: %w", err)
123125
}
124126

125-
if _, err := r.updatePipelineRunState(ctx, logger, pr, kubeinteraction.StateCompleted); err != nil {
127+
if _, err := r.updatePipelineRunState(ctx, logger, pr, finalState); err != nil {
126128
return repo, fmt.Errorf("cannot update state: %w", err)
127129
}
128130

@@ -186,8 +188,11 @@ func (r *Reconciler) updatePipelineRunToInProgress(ctx context.Context, logger *
186188
OriginalPipelineRunName: pr.GetLabels()[keys.OriginalPRName],
187189
}
188190

189-
if err := p.CreateStatus(ctx, r.run.Clients.Tekton, event, r.run.Info.Pac, status); err != nil {
190-
return fmt.Errorf("cannot create a in_progress status on the provider platform: %w", err)
191+
if err := createStatusWithRetry(ctx, logger, r.run.Clients.Tekton, p, event, r.run.Info.Pac, status); err != nil {
192+
// if failed to report status for running state, let the pipelineRun continue,
193+
// pipelineRun is already started so we will try again once it completes
194+
logger.Errorf("failed to report status to running on provider continuing! error: %v", err)
195+
return nil
191196
}
192197

193198
logger.Info("updated in_progress status on provider platform for pipelineRun ", pr.GetName())

pkg/reconciler/status.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"strings"
7+
"time"
78

89
"github.com/google/go-github/v48/github"
910
apipac "github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/keys"
@@ -16,6 +17,7 @@ import (
1617
"github.com/openshift-pipelines/pipelines-as-code/pkg/secrets"
1718
"github.com/openshift-pipelines/pipelines-as-code/pkg/sort"
1819
tektonv1beta1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
20+
"github.com/tektoncd/pipeline/pkg/client/clientset/versioned"
1921
"go.uber.org/zap"
2022
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2123
)
@@ -26,6 +28,12 @@ const (
2628
failureReasonText = "%s<br><h4>Failure reason</h4><br>%s"
2729
)
2830

31+
var backoffSchedule = []time.Duration{
32+
1 * time.Second,
33+
3 * time.Second,
34+
5 * time.Second,
35+
}
36+
2937
func (r *Reconciler) updateRepoRunStatus(ctx context.Context, logger *zap.SugaredLogger, pr *tektonv1beta1.PipelineRun, repo *pacv1a1.Repository, event *info.Event) error {
3038
refsanitized := formatting.SanitizeBranch(event.BaseBranch)
3139
repoStatus := pacv1a1.RepositoryRunStatus{
@@ -121,7 +129,21 @@ func (r *Reconciler) postFinalStatus(ctx context.Context, logger *zap.SugaredLog
121129
OriginalPipelineRunName: pr.GetLabels()[apipac.OriginalPRName],
122130
}
123131

124-
err = vcx.CreateStatus(ctx, r.run.Clients.Tekton, event, r.run.Info.Pac, status)
132+
err = createStatusWithRetry(ctx, logger, r.run.Clients.Tekton, vcx, event, r.run.Info.Pac, status)
125133
logger.Infof("pipelinerun %s has a status of '%s'", pr.Name, status.Conclusion)
126134
return pr, err
127135
}
136+
137+
func createStatusWithRetry(ctx context.Context, logger *zap.SugaredLogger, tekton versioned.Interface, vcx provider.Interface, event *info.Event, opts *info.PacOpts, status provider.StatusOpts) error {
138+
var finalError error
139+
for _, backoff := range backoffSchedule {
140+
err := vcx.CreateStatus(ctx, tekton, event, opts, status)
141+
if err == nil {
142+
return nil
143+
}
144+
logger.Infof("failed to create status, error: %v, retrying in %v", err, backoff)
145+
time.Sleep(backoff)
146+
finalError = err
147+
}
148+
return fmt.Errorf("failed to report status: %w", finalError)
149+
}

pkg/reconciler/status_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package reconciler
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
provider2 "github.com/openshift-pipelines/pipelines-as-code/pkg/provider"
8+
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/provider"
9+
"go.uber.org/zap"
10+
zapobserver "go.uber.org/zap/zaptest/observer"
11+
"gotest.tools/v3/assert"
12+
)
13+
14+
func TestCreateStatusWithRetry(t *testing.T) {
15+
observer, _ := zapobserver.New(zap.InfoLevel)
16+
fakelogger := zap.New(observer).Sugar()
17+
vcx := provider.TestProviderImp{}
18+
19+
err := createStatusWithRetry(context.TODO(), fakelogger, nil, &vcx, nil, nil, provider2.StatusOpts{})
20+
assert.NilError(t, err)
21+
}
22+
23+
func TestCreateStatusWithRetry_ErrorCase(t *testing.T) {
24+
observer, _ := zapobserver.New(zap.InfoLevel)
25+
fakelogger := zap.New(observer).Sugar()
26+
vcx := provider.TestProviderImp{}
27+
vcx.CreateStatusErorring = true
28+
29+
err := createStatusWithRetry(context.TODO(), fakelogger, nil, &vcx, nil, nil, provider2.StatusOpts{})
30+
assert.Error(t, err, "failed to report status: some provider error occurred while reporting status")
31+
}

pkg/test/provider/testwebvcs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func (v *TestProviderImp) GetTaskURI(ctx context.Context, params *params.Run, ev
6363

6464
func (v *TestProviderImp) CreateStatus(ctx context.Context, _ versioned.Interface, event *info.Event, opts *info.PacOpts, statusOpts provider.StatusOpts) error {
6565
if v.CreateStatusErorring {
66-
return fmt.Errorf("you want me to error I error for you")
66+
return fmt.Errorf("some provider error occurred while reporting status")
6767
}
6868
return nil
6969
}

0 commit comments

Comments
 (0)