diff --git a/config/overlays/odh-kueue/params.env b/config/overlays/odh-kueue/params.env index 012ff3041..f487e0d12 100644 --- a/config/overlays/odh-kueue/params.env +++ b/config/overlays/odh-kueue/params.env @@ -1,5 +1,5 @@ trustyaiServiceImage=quay.io/trustyai/trustyai-service:latest -trustyaiOperatorImage=quay.io/trustyai/trustyai-service-operator:latest +trustyaiOperatorImage=quay.io/rh-ee-asurzhen/trustyai-service-operator:test oauthProxyImage=quay.io/openshift/origin-oauth-proxy:4.14.0 kServeServerless=enabled lmes-driver-image=quay.io/trustyai/ta-lmes-driver:latest diff --git a/config/overlays/odh/params.env b/config/overlays/odh/params.env index 5a2b285ed..134e2ac3a 100644 --- a/config/overlays/odh/params.env +++ b/config/overlays/odh/params.env @@ -1,5 +1,5 @@ trustyaiServiceImage=quay.io/trustyai/trustyai-service:latest -trustyaiOperatorImage=quay.io/trustyai/trustyai-service-operator:latest +trustyaiOperatorImage=quay.io/rh-ee-asurzhen/trustyai-service-operator:test oauthProxyImage=quay.io/openshift/origin-oauth-proxy:4.14.0 kServeServerless=enabled lmes-driver-image=quay.io/trustyai/ta-lmes-driver:latest diff --git a/config/overlays/rhoai/params.env b/config/overlays/rhoai/params.env index cf38b630b..6488d0696 100644 --- a/config/overlays/rhoai/params.env +++ b/config/overlays/rhoai/params.env @@ -1,5 +1,5 @@ trustyaiServiceImage=quay.io/trustyai/trustyai-service:latest -trustyaiOperatorImage=quay.io/trustyai/trustyai-service-operator:latest +trustyaiOperatorImage=quay.io/rh-ee-asurzhen/trustyai-service-operator:test oauthProxyImage=registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4f8d66597feeb32bb18699326029f9a71a5aca4a57679d636b876377c2e95695 kServeServerless=enabled lmes-driver-image=quay.io/trustyai/ta-lmes-driver:latest diff --git a/controllers/gorch/guardrailsorchestrator_controller.go b/controllers/gorch/guardrailsorchestrator_controller.go index 5e3b947d0..09ede98c4 100644 --- a/controllers/gorch/guardrailsorchestrator_controller.go +++ b/controllers/gorch/guardrailsorchestrator_controller.go @@ -19,6 +19,7 @@ package gorch import ( "context" "time" + "fmt" routev1 "github.com/openshift/api/route/v1" gorchv1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/gorch/v1alpha1" @@ -91,10 +92,10 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct return ctrl.Result{}, err } - // Start reconcilation + // Start reconcilation with enhanced status message if orchestrator.Status.Conditions == nil { reason := ReconcileInit - message := "Initializing GuardrailsOrchestrator resource" + message := ReconcileInitMessage orchestrator, err = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { SetProgressingCondition(&saved.Status.Conditions, reason, message) saved.Status.Phase = PhaseProgressing @@ -103,6 +104,7 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct log.Error(err, "Failed to update GuardrailsOrchestrator status during initialization") return ctrl.Result{}, err } + r.Recorder.Event(orchestrator, "Normal", "Initializing", message) } if !controllerutil.ContainsFinalizer(orchestrator, finalizerName) { @@ -117,8 +119,7 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct } } - // Check if the GuardrailsOrchestrator is marked to be deleted, which is - // indicated by the deletion timestamp being set. + // Check if the GuardrailsOrchestrator is marked to be deleted isMarkedToBeDeleted := orchestrator.GetDeletionTimestamp() != nil if isMarkedToBeDeleted { if controllerutil.ContainsFinalizer(orchestrator, finalizerName) { @@ -135,10 +136,16 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct log.Error(err, "Failed to remove finalizer for GuardrailsOrchestrator") return ctrl.Result{}, err } + r.Recorder.Event(orchestrator, "Normal", "Deleted", "GuardrailsOrchestrator resource deleted successfully") } return ctrl.Result{}, nil } + // Update status to show we're creating ServiceAccount + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetProgressingCondition(&saved.Status.Conditions, ReconcileProgressing, "Creating ServiceAccount") + }) + existingServiceAccount := &corev1.ServiceAccount{} err = r.Get(ctx, types.NamespacedName{Name: orchestrator.Name + "-serviceaccount", Namespace: orchestrator.Namespace}, existingServiceAccount) if err != nil && errors.IsNotFound(err) { @@ -147,22 +154,43 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct err = r.Create(ctx, serviceAccount) if err != nil { log.Error(err, "Failed to create new ServiceAccount", "ServiceAccount.Namespace", serviceAccount.Namespace, "ServiceAccount.Name", serviceAccount.Name) + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetDegradedCondition(&saved.Status.Conditions, true, "ServiceAccountCreationFailed", + fmt.Sprintf("Failed to create ServiceAccount: %v", err)) + }) return ctrl.Result{}, err } + r.Recorder.Event(orchestrator, "Normal", "ServiceAccountCreated", ServiceAccountCreatedMessage) } else if err != nil { log.Error(err, "Failed to get ServiceAccount") return ctrl.Result{}, err } + // Check ConfigMap existence with enhanced status existingConfigMap := &corev1.ConfigMap{} err = r.Get(ctx, types.NamespacedName{Name: *orchestrator.Spec.OrchestratorConfig, Namespace: orchestrator.Namespace}, existingConfigMap) if err != nil { if client.IgnoreNotFound(err) != nil { + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetDegradedCondition(&saved.Status.Conditions, true, "ConfigMapError", + fmt.Sprintf("Error checking ConfigMap '%s': %v", *orchestrator.Spec.OrchestratorConfig, err)) + }) return ctrl.Result{}, err } + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetDegradedCondition(&saved.Status.Conditions, true, "ConfigMapNotFound", + fmt.Sprintf("%s '%s'", ConfigMapNotFoundMessage, *orchestrator.Spec.OrchestratorConfig)) + }) + r.Recorder.Event(orchestrator, "Warning", "ConfigMapNotFound", + fmt.Sprintf("Required ConfigMap '%s' not found in namespace '%s'", *orchestrator.Spec.OrchestratorConfig, orchestrator.Namespace)) return ctrl.Result{}, nil } + // Update status to show we're creating Deployment + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetProgressingCondition(&saved.Status.Conditions, ReconcileProgressing, "Creating Deployment") + }) + existingDeployment := &appsv1.Deployment{} err = r.Get(ctx, types.NamespacedName{Name: orchestrator.Name, Namespace: orchestrator.Namespace}, existingDeployment) if err != nil && errors.IsNotFound(err) { @@ -172,13 +200,23 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct err = r.Create(ctx, deployment) if err != nil { log.Error(err, "Failed to create new Deployment", "Deployment.Namespace", deployment.Namespace, "Deployment.Name", deployment.Name) + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetDegradedCondition(&saved.Status.Conditions, true, "DeploymentCreationFailed", + fmt.Sprintf("Failed to create Deployment: %v", err)) + }) return ctrl.Result{}, err } + r.Recorder.Event(orchestrator, "Normal", "DeploymentCreated", "Deployment created successfully") } else if err != nil { log.Error(err, "Failed to get Deployment") return ctrl.Result{}, err } + // Update status to show we're creating Service + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetProgressingCondition(&saved.Status.Conditions, ReconcileProgressing, "Creating Service") + }) + existingService := &corev1.Service{} err = r.Get(ctx, types.NamespacedName{Name: orchestrator.Name + "-service", Namespace: orchestrator.Namespace}, existingService) if err != nil && errors.IsNotFound(err) { @@ -188,13 +226,23 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct err = r.Create(ctx, service) if err != nil { log.Error(err, "Failed to create new Service", "Service.Namespace", service.Namespace, "Service.Name", service.Name) + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetDegradedCondition(&saved.Status.Conditions, true, "ServiceCreationFailed", + fmt.Sprintf("Failed to create Service: %v", err)) + }) return ctrl.Result{}, err } + r.Recorder.Event(orchestrator, "Normal", "ServiceCreated", ServiceCreatedMessage) } else if err != nil { log.Error(err, "Failed to get Service") return ctrl.Result{}, err } + // Update status to show we're creating Routes + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetProgressingCondition(&saved.Status.Conditions, ReconcileProgressing, "Creating Routes") + }) + existingRoute := &routev1.Route{} err = r.Get(ctx, types.NamespacedName{Name: orchestrator.Name, Namespace: orchestrator.Namespace}, existingRoute) if err != nil && errors.IsNotFound(err) { @@ -204,6 +252,14 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct err = r.Create(ctx, httpRoute) if err != nil { log.Error(err, "Failed to create new Route", "Route.Namespace", httpRoute.Namespace, "Route.Name", httpRoute.Name) + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetDegradedCondition(&saved.Status.Conditions, true, "HTTPRouteCreationFailed", + fmt.Sprintf("Failed to create HTTP Route: %v", err)) + }) + r.Recorder.Event(orchestrator, "Warning", "RouteCreationFailed", + fmt.Sprintf("Failed to create HTTP route: %v", err)) + } else { + r.Recorder.Event(orchestrator, "Normal", "HTTPRouteCreated", "HTTP route created successfully") } } else if err != nil { log.Error(err, "Failed to get Route") @@ -218,17 +274,33 @@ func (r *GuardrailsOrchestratorReconciler) Reconcile(ctx context.Context, req ct err = r.Create(ctx, healthRoute) if err != nil { log.Error(err, "Failed to create new Route", "Route.Namespace", healthRoute.Namespace, "Route.Name", healthRoute.Name) + orchestrator, _ = r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + SetDegradedCondition(&saved.Status.Conditions, true, "HealthRouteCreationFailed", + fmt.Sprintf("Failed to create Health Route: %v", err)) + }) + r.Recorder.Event(orchestrator, "Warning", "HealthRouteCreationFailed", + fmt.Sprintf("Failed to create health route: %v", err)) + } else { + r.Recorder.Event(orchestrator, "Normal", "HealthRouteCreated", "Health route created successfully") } } else if err != nil { log.Error(err, "Failed to get Route") return ctrl.Result{}, err } - // Finalize reconcilation + // Finalize reconcilation with comprehensive status update _, updateErr := r.reconcileStatuses(ctx, orchestrator) if updateErr != nil { return ctrl.Result{}, updateErr } + + // Emit appropriate event based on final status + if orchestrator.Status.Phase == PhaseReady { + r.Recorder.Event(orchestrator, "Normal", "Ready", ReconcileCompletedMessage) + } else if orchestrator.Status.Phase == PhaseFailed { + r.Recorder.Event(orchestrator, "Warning", "Failed", ReconcileFailedMessage) + } + return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil } diff --git a/controllers/gorch/inferenceservices.go b/controllers/gorch/inferenceservices.go index 9d4b68808..7f75b8621 100644 --- a/controllers/gorch/inferenceservices.go +++ b/controllers/gorch/inferenceservices.go @@ -2,16 +2,37 @@ package gorch import ( "context" + "fmt" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" "sigs.k8s.io/controller-runtime/pkg/client" ) +type NoInferenceServicesError struct { + Namespace string +} + +func (e *NoInferenceServicesError) Error() string { + return fmt.Sprintf("no InferenceServices found in namespace %s", e.Namespace) +} + func (r *GuardrailsOrchestratorReconciler) checkGeneratorPresent(ctx context.Context, namespace string) (bool, error) { isvcList := &kservev1beta1.InferenceServiceList{} if err := r.List(ctx, isvcList, client.InNamespace(namespace)); err != nil { - return false, err + return false, fmt.Errorf("failed to list InferenceServices: %w", err) } - return len(isvcList.Items) > 0, nil -} + if len(isvcList.Items) == 0 { + return false, &NoInferenceServicesError{Namespace: namespace} + } + + for _, isvc := range isvcList.Items { + for _, condition := range isvc.Status.Conditions { + if condition.Type == "Ready" && condition.Status == "True" { + return true, nil + } + } + } + + return false, nil +} \ No newline at end of file diff --git a/controllers/gorch/status.go b/controllers/gorch/status.go index 1a3571be2..e7f7d56d0 100644 --- a/controllers/gorch/status.go +++ b/controllers/gorch/status.go @@ -2,7 +2,9 @@ package gorch import ( "context" + "fmt" "time" + "errors" gorchv1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/gorch/v1alpha1" corev1 "k8s.io/api/core/v1" @@ -22,19 +24,46 @@ var ( const ( ConditionReconcileComplete gorchv1alpha1.ConditionType = "ReconcileComplete" ConditionProgessing gorchv1alpha1.ConditionType = "Progressing" + ConditionAvailable gorchv1alpha1.ConditionType = "Available" + ConditionDegraded gorchv1alpha1.ConditionType = "Degraded" ) const ( PhaseProgressing = "Progressing" PhaseReady = "Ready" + PhaseFailed = "Failed" + PhaseUnknown = "Unknown" ) const ( + // Reconcile reasons ReconcileFailed = "ReconcileFailed" ReconcileInit = "ReconcileInit" ReconcileCompleted = "ReconcileCompleted" - ReconcileCompletedMessage = "Reconcile completed successfully" - ReconcileFailedMessage = "Reconcile failed" + ReconcileProgressing = "ReconcileProgressing" + + // Reconcile messages + ReconcileInitMessage = "Initializing GuardrailsOrchestrator resources" + ReconcileCompletedMessage = "All GuardrailsOrchestrator components are ready and operational" + ReconcileFailedMessage = "Failed to reconcile GuardrailsOrchestrator resources" + ReconcileProgressingMessage = "GuardrailsOrchestrator reconciliation is in progress" + + // Component-specific messages + DeploymentReadyMessage = "GuardrailsOrchestrator deployment is running with all replicas available" + DeploymentNotReadyMessage = "Waiting for GuardrailsOrchestrator deployment to become ready" + DeploymentFailedMessage = "GuardrailsOrchestrator deployment failed to start" + + InferenceServiceReadyMessage = "InferenceService is available and ready to serve requests" + InferenceServiceNotReadyMessage = "Waiting for InferenceService to become available" + InferenceServiceNotFoundMessage = "No InferenceService found in namespace" + + RouteReadyMessage = "Routes are configured and accessible" + RouteNotReadyMessage = "Waiting for routes to be admitted by the router" + RouteFailedMessage = "Failed to configure routes" + + ConfigMapNotFoundMessage = "Required ConfigMap not found" + ServiceAccountCreatedMessage = "ServiceAccount created successfully" + ServiceCreatedMessage = "Service created and configured" ) func SetStatusCondition(conditions *[]gorchv1alpha1.Condition, newCondition gorchv1alpha1.Condition) bool { @@ -72,7 +101,6 @@ func updateCondition(existingCondition *gorchv1alpha1.Condition, newCondition go if existingCondition.Reason != newCondition.Reason { changed = true existingCondition.Reason = newCondition.Reason - } if existingCondition.Message != newCondition.Message { changed = true @@ -88,7 +116,32 @@ func SetProgressingCondition(conditions *[]gorchv1alpha1.Condition, reason strin Reason: reason, Message: message, }) +} +func SetAvailableCondition(conditions *[]gorchv1alpha1.Condition, available bool, reason string, message string) { + status := corev1.ConditionFalse + if available { + status = corev1.ConditionTrue + } + SetStatusCondition(conditions, gorchv1alpha1.Condition{ + Type: ConditionAvailable, + Status: status, + Reason: reason, + Message: message, + }) +} + +func SetDegradedCondition(conditions *[]gorchv1alpha1.Condition, degraded bool, reason string, message string) { + status := corev1.ConditionFalse + if degraded { + status = corev1.ConditionTrue + } + SetStatusCondition(conditions, gorchv1alpha1.Condition{ + Type: ConditionDegraded, + Status: status, + Reason: reason, + Message: message, + }) } func SetResourceCondition(conditions *[]gorchv1alpha1.Condition, component string, reason string, message string, status corev1.ConditionStatus) { @@ -125,48 +178,196 @@ func (r *GuardrailsOrchestratorReconciler) updateStatus(ctx context.Context, ori return saved, err } +// Updated reconcileStatuses function in status.go + func (r *GuardrailsOrchestratorReconciler) reconcileStatuses(ctx context.Context, orchestrator *gorchv1alpha1.GuardrailsOrchestrator) (ctrl.Result, error) { - generatorReady, _ = r.checkGeneratorPresent(ctx, orchestrator.Namespace) - deploymentReady, _ = r.checkDeploymentReady(ctx, orchestrator) - httpRouteReady, _ := r.checkRouteReady(ctx, orchestrator, "-http") - healthRouteReady, _ := r.checkRouteReady(ctx, orchestrator, "-health") + logger := log.FromContext(ctx) + + // Check component statuses + generatorReady, generatorErr := r.checkGeneratorPresent(ctx, orchestrator.Namespace) + deploymentReady, deploymentErr := r.checkDeploymentReady(ctx, orchestrator) + httpRouteReady, httpRouteErr := r.checkRouteReady(ctx, orchestrator, "") + healthRouteReady, healthRouteErr := r.checkRouteReady(ctx, orchestrator, "-health") routeReady = httpRouteReady && healthRouteReady - if generatorReady && deploymentReady && routeReady { - _, updateErr := r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { - SetResourceCondition(&saved.Status.Conditions, "InferenceService", "InferenceServiceReady", "Inference service is ready", corev1.ConditionTrue) - SetResourceCondition(&saved.Status.Conditions, "Deployment", "DeploymentReady", "Deployment is ready", corev1.ConditionTrue) - SetResourceCondition(&saved.Status.Conditions, "Route", "RouteReady", "Route is ready", corev1.ConditionTrue) - SetCompleteCondition(&saved.Status.Conditions, corev1.ConditionTrue, ReconcileCompleted, ReconcileCompletedMessage) - saved.Status.Phase = PhaseReady - }) - if updateErr != nil { - log.FromContext(ctx).Error(updateErr, "Failed to update status") - return ctrl.Result{}, updateErr + + // Check if we have a NoInferenceServicesError + var noInferenceServicesErr *NoInferenceServicesError + hasNoInferenceServices := generatorErr != nil && errors.As(generatorErr, &noInferenceServicesErr) + + // Calculate overall status + // InferenceService is considered a critical dependency - system cannot be ready without it + allReady := generatorReady && deploymentReady && routeReady + + // System is failed if critical components have errors + anyFailed := hasNoInferenceServices || + (generatorErr != nil && !errors.As(generatorErr, &noInferenceServicesErr)) || + (deploymentErr != nil && deploymentErr.Error() != "not ready") || + (httpRouteErr != nil && httpRouteErr.Error() != "not ready") || + (healthRouteErr != nil && healthRouteErr.Error() != "not ready") + + // Update status based on component states + _, updateErr := r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { + // Update InferenceService status with proper error handling + if hasNoInferenceServices { + logger.Info("No InferenceServices found in namespace", "namespace", orchestrator.Namespace) + SetResourceCondition(&saved.Status.Conditions, "InferenceService", + "InferenceServiceNotFound", + fmt.Sprintf("No InferenceServices found in namespace '%s'. Please create an InferenceService before deploying GuardrailsOrchestrator", orchestrator.Namespace), + corev1.ConditionFalse) + } else if generatorErr != nil { + logger.Error(generatorErr, "InferenceService check failed") + SetResourceCondition(&saved.Status.Conditions, "InferenceService", + "InferenceServiceCheckFailed", + fmt.Sprintf("Failed to check InferenceService status: %v", generatorErr), + corev1.ConditionFalse) + } else if generatorReady { + SetResourceCondition(&saved.Status.Conditions, "InferenceService", + "InferenceServiceReady", InferenceServiceReadyMessage, corev1.ConditionTrue) + } else { + SetResourceCondition(&saved.Status.Conditions, "InferenceService", + "InferenceServiceNotReady", + "InferenceServices exist but none are ready. Waiting for at least one InferenceService to become ready", + corev1.ConditionFalse) } - } else { - _, updateErr := r.updateStatus(ctx, orchestrator, func(saved *gorchv1alpha1.GuardrailsOrchestrator) { - if generatorReady { - SetResourceCondition(&saved.Status.Conditions, "InferenceService", "InferenceServiceReady", "Inference service is ready", corev1.ConditionTrue) - } else { - SetResourceCondition(&saved.Status.Conditions, "InferenceService", "InferenceServiceNotReady", "Inference service is not ready", corev1.ConditionFalse) + + // Update Deployment status with more detailed messages + if deploymentErr != nil && deploymentErr.Error() != "not ready" { + logger.Error(deploymentErr, "Deployment check failed") + SetResourceCondition(&saved.Status.Conditions, "Deployment", + "DeploymentFailed", fmt.Sprintf("%s: %v", DeploymentFailedMessage, deploymentErr), corev1.ConditionFalse) + } else if deploymentReady { + SetResourceCondition(&saved.Status.Conditions, "Deployment", + "DeploymentReady", DeploymentReadyMessage, corev1.ConditionTrue) + } else { + SetResourceCondition(&saved.Status.Conditions, "Deployment", + "DeploymentNotReady", DeploymentNotReadyMessage, corev1.ConditionFalse) + } + + // Update Route status with combined message for both routes + if (httpRouteErr != nil && httpRouteErr.Error() != "not ready") || + (healthRouteErr != nil && healthRouteErr.Error() != "not ready") { + var errMsg string + if httpRouteErr != nil && httpRouteErr.Error() != "not ready" { + errMsg = fmt.Sprintf("HTTP route error: %v", httpRouteErr) } - if deploymentReady { - SetResourceCondition(&saved.Status.Conditions, "Deployment", "DeploymentReady", "Deployment is ready", corev1.ConditionTrue) - } else { - SetResourceCondition(&saved.Status.Conditions, "Deployment", "DeploymentNotReady", "Deployment is not ready", corev1.ConditionFalse) + if healthRouteErr != nil && healthRouteErr.Error() != "not ready" { + if errMsg != "" { + errMsg += "; " + } + errMsg += fmt.Sprintf("Health route error: %v", healthRouteErr) } - if routeReady { - SetResourceCondition(&saved.Status.Conditions, "Route", "RouteReady", "Route is ready", corev1.ConditionTrue) - } else { - SetResourceCondition(&saved.Status.Conditions, "Route", "RouteNotReady", "Route is not ready", corev1.ConditionFalse) + logger.Error(fmt.Errorf(errMsg), "Route check failed") + SetResourceCondition(&saved.Status.Conditions, "Route", + "RouteFailed", fmt.Sprintf("%s: %s", RouteFailedMessage, errMsg), corev1.ConditionFalse) + } else if routeReady { + SetResourceCondition(&saved.Status.Conditions, "Route", + "RouteReady", RouteReadyMessage, corev1.ConditionTrue) + } else { + routeMsg := RouteNotReadyMessage + if !httpRouteReady && !healthRouteReady { + routeMsg = "Waiting for both HTTP and health routes to be admitted" + } else if !httpRouteReady { + routeMsg = "Waiting for HTTP route to be admitted" + } else if !healthRouteReady { + routeMsg = "Waiting for health route to be admitted" } + SetResourceCondition(&saved.Status.Conditions, "Route", + "RouteNotReady", routeMsg, corev1.ConditionFalse) + } - SetCompleteCondition(&saved.Status.Conditions, corev1.ConditionFalse, ReconcileFailed, ReconcileFailedMessage) - }) - if updateErr != nil { - log.FromContext(ctx).Error(updateErr, "Failed to update status") - return ctrl.Result{}, updateErr + // Set overall status conditions + if allReady { + SetCompleteCondition(&saved.Status.Conditions, corev1.ConditionTrue, + ReconcileCompleted, ReconcileCompletedMessage) + SetAvailableCondition(&saved.Status.Conditions, true, + "AllComponentsReady", "All GuardrailsOrchestrator components are operational") + SetDegradedCondition(&saved.Status.Conditions, false, + "NoIssues", "No degradation detected") + SetProgressingCondition(&saved.Status.Conditions, + ReconcileCompleted, "Reconciliation complete") + saved.Status.Phase = PhaseReady + } else if hasNoInferenceServices { + // Special handling for missing InferenceServices - this is a blocking error + SetCompleteCondition(&saved.Status.Conditions, corev1.ConditionFalse, + "MissingDependency", + fmt.Sprintf("Cannot complete reconciliation: No InferenceServices found in namespace '%s'", orchestrator.Namespace)) + SetAvailableCondition(&saved.Status.Conditions, false, + "InferenceServiceMissing", + "GuardrailsOrchestrator requires at least one InferenceService to be present") + SetDegradedCondition(&saved.Status.Conditions, true, + "MissingCriticalComponent", + "System is degraded due to missing InferenceService") + SetProgressingCondition(&saved.Status.Conditions, + "WaitingForDependency", + fmt.Sprintf("Waiting for InferenceService to be created in namespace '%s'", orchestrator.Namespace)) + saved.Status.Phase = PhaseFailed + } else if anyFailed { + SetCompleteCondition(&saved.Status.Conditions, corev1.ConditionFalse, + ReconcileFailed, ReconcileFailedMessage) + SetAvailableCondition(&saved.Status.Conditions, false, + "ComponentsFailed", "One or more components have failed") + SetDegradedCondition(&saved.Status.Conditions, true, + "ComponentFailure", "System is degraded due to component failures") + SetProgressingCondition(&saved.Status.Conditions, + ReconcileFailed, "Reconciliation failed with errors") + saved.Status.Phase = PhaseFailed + } else { + // Components are still coming up + SetCompleteCondition(&saved.Status.Conditions, corev1.ConditionFalse, + ReconcileProgressing, ReconcileProgressingMessage) + SetAvailableCondition(&saved.Status.Conditions, false, + "ComponentsNotReady", "Waiting for all components to become ready") + SetDegradedCondition(&saved.Status.Conditions, false, + "Initializing", "Components are initializing") + SetProgressingCondition(&saved.Status.Conditions, + ReconcileProgressing, getProgressMessage(generatorReady, deploymentReady, routeReady)) + saved.Status.Phase = PhaseProgressing } + }) + + if updateErr != nil { + logger.Error(updateErr, "Failed to update GuardrailsOrchestrator status") + return ctrl.Result{}, updateErr + } + + // If InferenceServices are missing, requeue more frequently to detect when they're created + if hasNoInferenceServices { + return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } + return ctrl.Result{}, nil } + +// Helper function to generate a detailed progress message +func getProgressMessage(generatorReady, deploymentReady, routeReady bool) string { + readyComponents := []string{} + waitingComponents := []string{} + + if generatorReady { + readyComponents = append(readyComponents, "InferenceService") + } else { + waitingComponents = append(waitingComponents, "InferenceService") + } + + if deploymentReady { + readyComponents = append(readyComponents, "Deployment") + } else { + waitingComponents = append(waitingComponents, "Deployment") + } + + if routeReady { + readyComponents = append(readyComponents, "Routes") + } else { + waitingComponents = append(waitingComponents, "Routes") + } + + message := "GuardrailsOrchestrator reconciliation in progress" + if len(readyComponents) > 0 { + message += fmt.Sprintf(". Ready: %v", readyComponents) + } + if len(waitingComponents) > 0 { + message += fmt.Sprintf(". Waiting for: %v", waitingComponents) + } + + return message +} \ No newline at end of file