diff --git a/api/v1alpha/instance_types.go b/api/v1alpha/instance_types.go index cb1698b3..457537a4 100644 --- a/api/v1alpha/instance_types.go +++ b/api/v1alpha/instance_types.go @@ -404,8 +404,10 @@ const ( // InstanceReady indicates that the instance is ready InstanceReady = "Ready" - // InstanceRunning indicates that the instance is running - InstanceRunning = "Running" + // InstanceAvailable indicates that the instance is available. It is True + // when the instance is serving and does not assert that a process is + // actively running at this instant. + InstanceAvailable = "Available" // InstanceProgrammed indicates that the instance has been programmed InstanceProgrammed = "Programmed" @@ -458,20 +460,42 @@ const ( // InstanceReadyReasonSchedulingGatesPresent indicates that the instance is not ready because scheduling gates are present. InstanceReadyReasonSchedulingGatesPresent = "SchedulingGatesPresent" - // InstanceReadyReasonRunning indicates that the instance is running - InstanceReadyReasonRunning = "Running" + // InstanceReadyReasonAvailable indicates that the instance is available + InstanceReadyReasonAvailable = "Available" - // InstanceRunningReasonStopped indicates that the instance is stopped - InstanceRunningReasonStopped = "Stopped" + // InstanceReadyReasonImageUnavailable indicates the provider could not pull + // the instance image (bad name, missing credentials, registry unreachable). + // This matches the reason written by translateWaitingReason in the unikraft + // provider when the container enters an image-pull waiting state. + InstanceReadyReasonImageUnavailable = "ImageUnavailable" - // InstanceRunningReasonStarting indicates that the instance is starting - InstanceRunningReasonStarting = "Starting" + // InstanceReadyReasonInstanceCrashing indicates the instance process started + // but is repeatedly exiting and being restarted (CrashLoopBackOff in the + // underlying runtime). This is user-actionable: the application itself is + // failing, not the platform. + InstanceReadyReasonInstanceCrashing = "InstanceCrashing" - // InstanceRunningReasonStopping indicates that the instance is stopping - InstanceRunningReasonStopping = "Stopping" + // InstanceReadyReasonConfigurationError indicates the runtime rejected the + // instance configuration before the process could start (e.g. invalid env + // variable injection, missing device). User must correct the workload spec. + InstanceReadyReasonConfigurationError = "ConfigurationError" - // InstanceRunningReasonRunning indicates that the instance is running - InstanceRunningReasonRunning = "Running" + // InstanceReadyReasonProvisioning indicates the instance runtime is still + // setting up the execution environment (container being created, image being + // unpacked). This is a transient, non-actionable state. + InstanceReadyReasonProvisioning = "Provisioning" + + // InstanceAvailableReasonStopped indicates that the instance is stopped + InstanceAvailableReasonStopped = "Stopped" + + // InstanceAvailableReasonStarting indicates that the instance is starting + InstanceAvailableReasonStarting = "Starting" + + // InstanceAvailableReasonStopping indicates that the instance is stopping + InstanceAvailableReasonStopping = "Stopping" + + // InstanceAvailableReasonAvailable indicates that the instance is available + InstanceAvailableReasonAvailable = "Available" // InstanceProgrammedReasonPendingProgramming indicates that the instance has not been programmed InstanceProgrammedReasonPendingProgramming = "PendingProgramming" @@ -515,7 +539,7 @@ type Instance struct { // Status defines the current state of an Instance. // - // +kubebuilder:default={conditions:{{type:"Programmed",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Running",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Ready",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"QuotaGranted",status:"Unknown",reason:"PendingEvaluation",message:"Waiting for quota evaluation",lastTransitionTime:"1970-01-01T00:00:00Z"}}} + // +kubebuilder:default={conditions:{{type:"Programmed",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Available",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Ready",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"QuotaGranted",status:"Unknown",reason:"PendingEvaluation",message:"Waiting for quota evaluation",lastTransitionTime:"1970-01-01T00:00:00Z"}}} Status InstanceStatus `json:"status,omitempty"` } diff --git a/api/v1alpha/workload_types.go b/api/v1alpha/workload_types.go index 617172d4..e3e9b04e 100644 --- a/api/v1alpha/workload_types.go +++ b/api/v1alpha/workload_types.go @@ -58,15 +58,27 @@ type WorkloadStatus struct { // The number of instances that currently exist Replicas int32 `json:"replicas"` - // The number of instances which have the latest workload settings applied. + // The number of instances which have the latest workload settings applied + // and are programmed (a subset of UpdatedReplicas that are ready to serve). CurrentReplicas int32 `json:"currentReplicas"` + // The number of instances updated to the latest template revision (their + // observed template hash matches the desired template), regardless of + // readiness. Lags Replicas during a rolling update or restart, then catches + // back up — making an in-progress roll observable. + UpdatedReplicas int32 `json:"updatedReplicas"` + // The desired number of instances DesiredReplicas int32 `json:"desiredReplicas"` // The number of instances which are ready. ReadyReplicas int32 `json:"readyReplicas"` + // The most recent generation observed by the workload controller. + // + // +kubebuilder:validation:Optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // The current status of placemetns in a workload. Placements []WorkloadPlacementStatus `json:"placements,omitempty"` @@ -99,7 +111,7 @@ type WorkloadGatewayStatus struct { // +kubebuilder:printcolumn:name="Replicas",type=string,JSONPath=`.status.replicas` // +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.readyReplicas` // +kubebuilder:printcolumn:name="Desired",type=string,JSONPath=`.status.desiredReplicas` -// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.currentReplicas` +// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.updatedReplicas` type Workload struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` @@ -146,9 +158,14 @@ type WorkloadPlacementStatus struct { // The number of instances that currently exist Replicas int32 `json:"replicas"` - // The number of instances which have the latest workload settings applied. + // The number of instances which have the latest workload settings applied + // and are programmed (a subset of UpdatedReplicas that are ready to serve). CurrentReplicas int32 `json:"currentReplicas"` + // The number of instances updated to the latest template revision, regardless + // of readiness. Lags Replicas during a rolling update or restart. + UpdatedReplicas int32 `json:"updatedReplicas"` + // The desired number of instances DesiredReplicas int32 `json:"desiredReplicas"` diff --git a/api/v1alpha/workloaddeployment_types.go b/api/v1alpha/workloaddeployment_types.go index 7da27c89..7da6bf45 100644 --- a/api/v1alpha/workloaddeployment_types.go +++ b/api/v1alpha/workloaddeployment_types.go @@ -49,14 +49,28 @@ type WorkloadDeploymentStatus struct { // The number of instances created Replicas int32 `json:"replicas"` - // The number of instances which have the latest workload settings applied. + // The number of instances which have the latest workload settings applied + // and are programmed (a subset of UpdatedReplicas that are ready to serve). CurrentReplicas int32 `json:"currentReplicas"` + // The number of instances updated to the latest template revision, i.e. + // whose observed template hash matches the desired template, regardless of + // readiness. Lags Replicas during a rolling update or restart, then catches + // back up — making an in-progress roll observable. + UpdatedReplicas int32 `json:"updatedReplicas"` + // The desired number of instances DesiredReplicas int32 `json:"desiredReplicas"` // The number of instances which are ready. ReadyReplicas int32 `json:"readyReplicas"` + + // The most recent generation observed by the deployment controller. When + // this matches metadata.generation, the controller has reconciled the + // latest spec (e.g. a restart request). + // + // +kubebuilder:validation:Optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` } const ( @@ -79,7 +93,7 @@ const ( // +kubebuilder:printcolumn:name="Replicas",type=string,JSONPath=`.status.replicas` // +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.readyReplicas` // +kubebuilder:printcolumn:name="Desired",type=string,JSONPath=`.status.desiredReplicas` -// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.currentReplicas` +// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.updatedReplicas` // +kubebuilder:printcolumn:name="Location Namespace",type=string,JSONPath=`.status.location.namespace`,priority=1 // +kubebuilder:printcolumn:name="Location Name",type=string,JSONPath=`.status.location.name`,priority=1 type WorkloadDeployment struct { diff --git a/cmd/main.go b/cmd/main.go index 01d3eddd..1d1241be 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -467,7 +467,27 @@ func ignoreCanceled(err error) error { // InstanceProjector). Called only when management controllers are enabled and // a federation REST config is available. func setupManagementControllers(mgr mcmanager.Manager, federationClient client.Client) ([]manager.Runnable, error) { - federator := &controller.WorkloadDeploymentFederator{FederationClient: federationClient} + // The federation manager provides a cached, watchable handle to the Karmada + // federation control plane. It backs the InstanceProjector's Instance watch + // and the WorkloadDeploymentFederator's downstream WorkloadDeployment status + // watch. A manager.Manager embeds a cluster.Cluster, so it can be passed + // directly anywhere a watchable federation cluster source is required. + federationMgr, err := manager.New(federationRestConfig, manager.Options{ + Scheme: scheme, + Metrics: metricsserver.Options{BindAddress: "0"}, + }) + if err != nil { + return nil, fmt.Errorf("federation manager: %w", err) + } + + // The federator watches both the project WD (via the multicluster manager) + // and the downstream Karmada WD (via the federation cluster) so that status + // aggregated downstream by Karmada is mirrored back to the project WD + // immediately instead of on the next informer resync. + federator := &controller.WorkloadDeploymentFederator{ + FederationClient: federationClient, + FederationCluster: federationMgr, + } if err := federator.SetupWithManager(mgr); err != nil { return nil, fmt.Errorf("WorkloadDeploymentFederator: %w", err) } @@ -475,13 +495,6 @@ func setupManagementControllers(mgr mcmanager.Manager, federationClient client.C // InstanceProjector runs in the management plane, watches Instances written // back by POP-cell operators to the Karmada federation control plane, and // projects them into the corresponding project namespaces via the multicluster manager. - federationMgr, err := manager.New(federationRestConfig, manager.Options{ - Scheme: scheme, - Metrics: metricsserver.Options{BindAddress: "0"}, - }) - if err != nil { - return nil, fmt.Errorf("federation manager for InstanceProjector: %w", err) - } if err = (&controller.InstanceProjector{ FederationClient: federationClient, MCManager: mgr, diff --git a/config/base/crd/bases/compute.datumapis.com_instances.yaml b/config/base/crd/bases/compute.datumapis.com_instances.yaml index c9301561..a007c0d7 100644 --- a/config/base/crd/bases/compute.datumapis.com_instances.yaml +++ b/config/base/crd/bases/compute.datumapis.com_instances.yaml @@ -887,7 +887,7 @@ spec: message: Waiting for controller reason: Pending status: Unknown - type: Running + type: Available - lastTransitionTime: "1970-01-01T00:00:00Z" message: Waiting for controller reason: Pending diff --git a/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml b/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml index 48a2501d..e584af9f 100644 --- a/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml +++ b/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml @@ -34,7 +34,7 @@ spec: - jsonPath: .status.desiredReplicas name: Desired type: string - - jsonPath: .status.currentReplicas + - jsonPath: .status.updatedReplicas name: Up-to-date type: string - jsonPath: .status.location.namespace @@ -1087,8 +1087,9 @@ spec: type: object type: array currentReplicas: - description: The number of instances which have the latest workload - settings applied. + description: |- + The number of instances which have the latest workload settings applied + and are programmed (a subset of UpdatedReplicas that are ready to serve). format: int32 type: integer desiredReplicas: @@ -1109,6 +1110,13 @@ spec: - name - namespace type: object + observedGeneration: + description: |- + The most recent generation observed by the deployment controller. When + this matches metadata.generation, the controller has reconciled the + latest spec (e.g. a restart request). + format: int64 + type: integer readyReplicas: description: The number of instances which are ready. format: int32 @@ -1117,11 +1125,20 @@ spec: description: The number of instances created format: int32 type: integer + updatedReplicas: + description: |- + The number of instances updated to the latest template revision, i.e. + whose observed template hash matches the desired template, regardless of + readiness. Lags Replicas during a rolling update or restart, then catches + back up — making an in-progress roll observable. + format: int32 + type: integer required: - currentReplicas - desiredReplicas - readyReplicas - replicas + - updatedReplicas type: object type: object served: true diff --git a/config/base/crd/bases/compute.datumapis.com_workloads.yaml b/config/base/crd/bases/compute.datumapis.com_workloads.yaml index c452910f..c1c8efd9 100644 --- a/config/base/crd/bases/compute.datumapis.com_workloads.yaml +++ b/config/base/crd/bases/compute.datumapis.com_workloads.yaml @@ -37,7 +37,7 @@ spec: - jsonPath: .status.desiredReplicas name: Desired type: string - - jsonPath: .status.currentReplicas + - jsonPath: .status.updatedReplicas name: Up-to-date type: string name: v1alpha @@ -1081,8 +1081,9 @@ spec: type: object type: array currentReplicas: - description: The number of instances which have the latest workload - settings applied. + description: |- + The number of instances which have the latest workload settings applied + and are programmed (a subset of UpdatedReplicas that are ready to serve). format: int32 type: integer deployments: @@ -1367,6 +1368,10 @@ spec: - name x-kubernetes-list-type: map type: object + observedGeneration: + description: The most recent generation observed by the workload controller. + format: int64 + type: integer placements: description: The current status of placemetns in a workload. items: @@ -1432,8 +1437,9 @@ spec: type: object type: array currentReplicas: - description: The number of instances which have the latest workload - settings applied. + description: |- + The number of instances which have the latest workload settings applied + and are programmed (a subset of UpdatedReplicas that are ready to serve). format: int32 type: integer desiredReplicas: @@ -1451,12 +1457,19 @@ spec: description: The number of instances that currently exist format: int32 type: integer + updatedReplicas: + description: |- + The number of instances updated to the latest template revision, regardless + of readiness. Lags Replicas during a rolling update or restart. + format: int32 + type: integer required: - currentReplicas - desiredReplicas - name - readyReplicas - replicas + - updatedReplicas type: object type: array readyReplicas: @@ -1467,12 +1480,21 @@ spec: description: The number of instances that currently exist format: int32 type: integer + updatedReplicas: + description: |- + The number of instances updated to the latest template revision (their + observed template hash matches the desired template), regardless of + readiness. Lags Replicas during a rolling update or restart, then catches + back up — making an in-progress roll observable. + format: int32 + type: integer required: - currentReplicas - deployments - desiredReplicas - readyReplicas - replicas + - updatedReplicas type: object required: - spec diff --git a/config/components/controller_rbac/role.yaml b/config/components/controller_rbac/role.yaml index e8721899..a634f512 100644 --- a/config/components/controller_rbac/role.yaml +++ b/config/components/controller_rbac/role.yaml @@ -4,6 +4,13 @@ kind: ClusterRole metadata: name: compute rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch - apiGroups: - "" resources: diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index f11520a7..86ab5572 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -7,6 +7,7 @@ import ( "fmt" "maps" "strings" + "time" corev1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" @@ -57,6 +58,19 @@ const ( // the same project control planes. instanceQuotaClaimSourceLabel = "compute.datumapis.com/source-cluster" + // instanceQuotaClaimNamespaceLabel records the source Instance's namespace on + // the ResourceClaim. The claim lives in the project's quota namespace (not the + // Instance's namespace), so the claim watch reads this label to map a grant + // back to the owning Instance. + instanceQuotaClaimNamespaceLabel = "compute.datumapis.com/instance-namespace" + + // instanceQuotaClaimNamePrefix namespaces an Instance's ResourceClaim name by + // resource type. Claims for different resource kinds share the project quota + // namespace, so the Instance name alone (unique among Instances, but not + // across kinds) could collide with another kind's claim — the prefix prevents + // that. The claim watch strips it to recover the Instance name. + instanceQuotaClaimNamePrefix = "instance-" + // quotaResourceTypeInstances is the quota resource type for Instance count. quotaResourceTypeInstances = "compute.datumapis.com/instances" @@ -75,13 +89,63 @@ const ( // msgInstanceProgrammed is the human-readable message for the programmed state. msgInstanceProgrammed = "Instance has been programmed" - // msgInstanceRunning is the human-readable message for the running state. - msgInstanceRunning = "Instance is running" + // msgInstanceAvailable is the human-readable message for the available state. + msgInstanceAvailable = "Instance is available" // reasonNetworkFailedToCreate is the reason code for network creation failure. reasonNetworkFailedToCreate = "NetworkFailedToCreate" ) +// instanceTypeD1Standard2 is the platform instance type name for the +// 1 vCPU / 2 GiB size used as the catalog baseline for quota accounting. +const instanceTypeD1Standard2 = "datumcloud/d1-standard-2" + +// instanceTypeResources holds the vCPU and memory for a named instance type. +type instanceTypeResources struct { + // CPUMillicores is the number of CPU millicores (1000 = 1 vCPU). + CPUMillicores int64 + // MemoryMiB is the amount of RAM in mebibytes. + MemoryMiB int64 +} + +// instanceTypeCatalog maps platform instance type names to their resource +// dimensions used for quota accounting when the instance spec carries only an +// instanceType and no explicit container Limits or instance-level Requests. +// +// These are the platform-declared quota sizes for the instance type, not a +// derivation of any infra provider's machine type. (infra-provider-gcp separately +// maps datumcloud/d1-standard-2 to the GCP n2-standard-2 machine type for VM +// provisioning; that mapping does not define the quota size here.) When new +// instance types are added, add them here with their vCPU/memory values. +var instanceTypeCatalog = map[string]instanceTypeResources{ + instanceTypeD1Standard2: { + CPUMillicores: 1000, // 1 vCPU + MemoryMiB: 2048, // 2 GiB + }, +} + +// Quota-pending requeue backoff. The instance controller is normally re-queued by +// the ResourceClaim watch when a claim is granted, but that grant event lives on +// the project control plane and can be missed (informer engagement races, watch +// relist gaps), wedging the instance at QuotaGranted!=True indefinitely. While +// quota is pending we requeue on a backing-off schedule as a safety net so a +// missed grant self-heals. The interval lengthens the longer the instance waits: +// +// elapsed < 60s : every 1s (catch a grant landing almost immediately) +// 60s – 5m : every 15s +// 5m – 10m : every 60s +// >= 10m : every 300s +const ( + quotaPendingRequeueFast = 1 * time.Second + quotaPendingRequeueMedium = 15 * time.Second + quotaPendingRequeueSlow = 60 * time.Second + quotaPendingRequeueIdle = 300 * time.Second + + quotaPendingFastWindow = 60 * time.Second + quotaPendingMediumWindow = 5 * time.Minute + quotaPendingSlowWindow = 10 * time.Minute +) + // clusterGetter is the subset of mcmanager.Manager used by InstanceReconciler. // Keeping it narrow allows unit tests to substitute a minimal fake. type clusterGetter interface { @@ -153,6 +217,7 @@ type InstanceReconciler struct { // +kubebuilder:rbac:groups=compute.datumapis.com,resources=instances/finalizers,verbs=update // +kubebuilder:rbac:groups=quota.miloapis.com,resources=resourceclaims,verbs=get;list;watch;create;delete // +kubebuilder:rbac:groups="",resources=namespaces,verbs=get +// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Request) (_ ctrl.Result, err error) { logger := log.FromContext(ctx) @@ -201,6 +266,18 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Requ statusChanged, quotaErr := r.reconcileQuotaCondition(ctx, req.ClusterName, &instance) + // Safety-net requeue while quota is not yet granted, computed up front so + // every return path below honors it. A conflict during the pending window + // must not drop the instance onto controller-runtime's exponential + // error-backoff (which can stretch to minutes), which would defeat recovery + // from a missed ResourceClaim grant event. Logged so the requeue is + // observable: a re-firing requeue prints this every pass while pending. + quotaReq := quotaPendingRequeueAfter(&instance, time.Now()) + if quotaReq > 0 { + logger.Info("quota pending; scheduling safety-net requeue", + "after", quotaReq.String(), "cluster", req.ClusterName.String(), "instance", instance.Name) + } + // Even when reconcileQuotaCondition returns a transient error, persist any // condition change first so the failure reason is visible on the Instance. // We return the error afterwards so controller-runtime requeues with backoff. @@ -211,6 +288,11 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Requ if statusChanged || readyChanged { if err := cl.GetClient().Status().Update(ctx, &instance); err != nil { + if quotaReq > 0 && apierrors.IsConflict(err) { + logger.Info("status update conflicted while quota pending; requeuing instead of error-backoff", + "after", quotaReq.String(), "instance", instance.Name) + return ctrl.Result{RequeueAfter: quotaReq}, nil + } return ctrl.Result{}, err } // Return with the quota error (nil or transient) so controller-runtime @@ -235,10 +317,20 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Requ } if err := r.writeBackToUpstream(ctx, req.ClusterName, &instance); err != nil { + if quotaReq > 0 && apierrors.IsConflict(err) { + logger.Info("upstream writeback conflicted while quota pending; requeuing instead of error-backoff", + "after", quotaReq.String(), "instance", instance.Name) + return ctrl.Result{RequeueAfter: quotaReq}, nil + } return ctrl.Result{}, err } - return ctrl.Result{}, nil + if quotaReq > 0 { + logger.Info("requeuing instance", "after", quotaReq.String(), + "cluster", req.ClusterName.String(), "instance", instance.Name) + } + + return ctrl.Result{RequeueAfter: quotaReq}, nil } // reconcileDeletion handles quota-claim cleanup when an Instance is being @@ -275,7 +367,7 @@ func (r *InstanceReconciler) reconcileDeletion(ctx context.Context, cl client.Cl if err != nil { return fmt.Errorf("resolving project namespace during deletion: %w", err) } - claimName := fmt.Sprintf("%s--%s", instance.Namespace, instance.Name) + claimName := quotaClaimName(instance) var claim quotav1alpha1.ResourceClaim if err := projectClient.Get(ctx, client.ObjectKey{Namespace: claimNamespace, Name: claimName}, &claim); err != nil { if !apierrors.IsNotFound(err) { @@ -296,6 +388,45 @@ func (r *InstanceReconciler) reconcileDeletion(ctx context.Context, cl client.Cl return nil } +// quotaClaimName returns the name of the ResourceClaim backing an Instance's +// quota: the Instance name (unique among Instances within the project control +// plane) prefixed by instanceQuotaClaimNamePrefix to avoid colliding with other +// resource kinds' claims in the shared quota namespace. The owning Instance's +// namespace is preserved on the claim via instanceQuotaClaimNamespaceLabel so +// the claim watch can map a grant back to the Instance. +func quotaClaimName(instance *computev1alpha.Instance) string { + return instanceQuotaClaimNamePrefix + instance.Name +} + +// quotaPendingRequeueAfter returns a safety-net requeue interval while the +// instance's quota is not yet granted, backing off the longer it has waited (see +// the quotaPendingRequeue* constants). It returns 0 when quota is already granted +// (QuotaGranted=True) or the condition is absent, so a granted/normal instance is +// not needlessly requeued. +// +// Elapsed time is anchored on the instance's creation timestamp, NOT the +// QuotaGranted condition's LastTransitionTime: while quota is pending the +// condition stays Unknown (PendingEvaluation and NoBudget are both Unknown), so +// SetStatusCondition never bumps LastTransitionTime off its 1970-01-01 CRD +// default — which would peg every pending instance to the slowest tier. +func quotaPendingRequeueAfter(instance *computev1alpha.Instance, now time.Time) time.Duration { + cond := apimeta.FindStatusCondition(instance.Status.Conditions, computev1alpha.InstanceQuotaGranted) + if cond == nil || cond.Status == metav1.ConditionTrue { + return 0 + } + elapsed := now.Sub(instance.CreationTimestamp.Time) + switch { + case elapsed < quotaPendingFastWindow: + return quotaPendingRequeueFast + case elapsed < quotaPendingMediumWindow: + return quotaPendingRequeueMedium + case elapsed < quotaPendingSlowWindow: + return quotaPendingRequeueSlow + default: + return quotaPendingRequeueIdle + } +} + // reconcileQuotaCondition reconciles the ResourceClaim and updates the // InstanceQuotaGranted status condition. It returns (changed, err) where // changed=true means a status update is required, and err non-nil means the @@ -627,7 +758,7 @@ func (r *InstanceReconciler) reconcileQuotaClaim(ctx context.Context, clusterNam return nil, nil } - claimName := fmt.Sprintf("%s--%s", instance.Namespace, instance.Name) + claimName := quotaClaimName(instance) requests := []quotav1alpha1.ResourceRequest{ { @@ -657,7 +788,8 @@ func (r *InstanceReconciler) reconcileQuotaClaim(ctx context.Context, clusterNam Name: claimName, Namespace: claimNamespace, Labels: map[string]string{ - instanceQuotaClaimSourceLabel: r.edgeClusterName, + instanceQuotaClaimSourceLabel: r.edgeClusterName, + instanceQuotaClaimNamespaceLabel: instance.Namespace, }, }, Spec: quotav1alpha1.ResourceClaimSpec{ @@ -748,8 +880,24 @@ func (r *InstanceReconciler) classifyCreateError( }, fmt.Errorf("failed creating resource claim: %w", err) } +// resolveInstanceResources determines the vCPU and memory amounts to claim +// for an instance. Explicit sizing always takes precedence over the instance +// type catalog, so a workload that overrides container limits is accounted at +// its actual resource footprint rather than the catalog baseline. +// +// Precedence order: +// 1. Sandbox container Limits (sum across all containers) — all containers +// must have both cpu and memory Limits for this path to succeed. +// 2. Instance-level Resources.Requests — both cpu and memory must be present. +// 3. instanceTypeCatalog lookup by instanceType — used for the common case +// where a workload is sized only by instanceType with no explicit limits. +// +// Returns (0, 0, false) when none of the above yield a complete sizing, so +// the caller falls back to claiming only the instance count. func resolveInstanceResources(instance *computev1alpha.Instance) (cpuMillicores int64, memMiB int64, resolved bool) { rt := instance.Spec.Runtime + + // Path 1: explicit per-container Limits — most specific, wins if fully set. if rt.Sandbox != nil { var totalCPU resource.Quantity var totalMem resource.Quantity @@ -768,18 +916,59 @@ func resolveInstanceResources(instance *computev1alpha.Instance) (cpuMillicores totalCPU.Add(cpu) totalMem.Add(mem) } - if !allSet || len(rt.Sandbox.Containers) == 0 { - return 0, 0, false + if allSet && len(rt.Sandbox.Containers) > 0 { + return totalCPU.MilliValue(), totalMem.Value() / (1024 * 1024), true } - return totalCPU.MilliValue(), totalMem.Value() / (1024 * 1024), true + // Containers exist but limits are incomplete — fall through to catalog + // rather than returning false, because instanceType is still set. } + // Path 2: instance-level resource requests. cpu, hasCPU := rt.Resources.Requests[corev1.ResourceCPU] mem, hasMem := rt.Resources.Requests[corev1.ResourceMemory] - if !hasCPU || !hasMem { - return 0, 0, false + if hasCPU && hasMem { + return cpu.MilliValue(), mem.Value() / (1024 * 1024), true + } + + // Path 3: instanceType catalog — handles the typical production case where + // instanceType is the only sizing signal and no explicit limits are set. + if rt.Resources.InstanceType != "" { + if spec, ok := instanceTypeCatalog[rt.Resources.InstanceType]; ok { + return spec.CPUMillicores, spec.MemoryMiB, true + } + } + + return 0, 0, false +} + +// instanceBlockingReasonPriority ranks Instance blocking reasons so the most +// specific, user-actionable cause wins when several conditions are unsatisfied. +// Higher numbers are more specific. Reasons absent from the table rank 0. +// +// 0 - unknown/default +// 1 - Provisioning (transient runtime startup) +// 3 - PendingQuota (operator action may be needed) +// 5 - ImageUnavailable / InstanceCrashing / ConfigurationError +// (hard runtime error, user-actionable) +// 7 - NetworkFailedToCreate (hard infra error) +func instanceBlockingReasonPriority(reason string) int { + switch reason { + case computev1alpha.InstanceReadyReasonProvisioning: + return 1 + case computev1alpha.InstanceProgrammedReasonPendingQuota: + return 3 + case computev1alpha.InstanceReadyReasonImageUnavailable, + computev1alpha.InstanceReadyReasonInstanceCrashing, + computev1alpha.InstanceReadyReasonConfigurationError: + // Hard runtime errors are user-actionable (wrong image, crashing app, bad + // config) and rank highest among non-infra reasons so they are not buried + // under transient startup/quota reasons. + return 5 + case reasonNetworkFailedToCreate: + return 7 + default: + return 0 } - return cpu.MilliValue(), mem.Value() / (1024 * 1024), true } // networkFailureChecker is a function that checks if a network creation failure @@ -806,7 +995,7 @@ func (r *InstanceReconciler) reconcileInstanceReadyCondition( ObservedGeneration: instance.Generation, }) changed = apimeta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionFalse, Reason: computev1alpha.InstanceProgrammedReasonPendingQuota, Message: msg, @@ -863,42 +1052,119 @@ func (r *InstanceReconciler) reconcileInstanceReadyCondition( if programmedCondition == nil || programmedCondition.Status != metav1.ConditionTrue { logger.Info("instance is not programmed", "instance", instance.Name) - readyCondition.Status = metav1.ConditionFalse - readyCondition.Reason = computev1alpha.InstanceProgrammedReasonPendingProgramming - if programmedCondition != nil && programmedCondition.Reason != pendingReason { - readyCondition.Reason = programmedCondition.Reason + // Surface the most specific provider sub-condition rather than a generic + // "Instance has not been programmed". A provider reason like + // ImageUnavailable (set on the Available condition while Programmed is + // still Unknown) must surface on Ready with its actionable message. + // + // Two tiers are tracked: + // - bestKnown: the best candidate from the priority table (ranked 1-7). + // - fallback: the Programmed condition's own reason/message when it has + // one but it is not in the priority table (e.g. a provider + // writes a custom Programmed reason otherwise unknown to + // this controller). Preserves Programmed.Reason → Ready.Reason + // pass-through behavior. + type candidate struct { + status metav1.ConditionStatus + reason string + message string + priority int } - readyCondition.Message = msgNotProgrammed - if programmedCondition != nil && programmedCondition.Status != metav1.ConditionUnknown { - readyCondition.Message = programmedCondition.Message + // Generic default — used only when nothing better is found. + fallbackCandidate := candidate{ + status: metav1.ConditionFalse, + reason: computev1alpha.InstanceProgrammedReasonPendingProgramming, + message: msgNotProgrammed, + priority: -1, + } + // Promote the Programmed condition's own reason as a fallback when it is + // more specific than PendingProgramming/Pending but not in the priority + // table. Preserves pass-through for provider-written Programmed reasons. + if programmedCondition != nil && programmedCondition.Reason != pendingReason && + programmedCondition.Reason != computev1alpha.InstanceProgrammedReasonPendingProgramming { + fallbackCandidate = candidate{ + status: programmedCondition.Status, + reason: programmedCondition.Reason, + message: programmedCondition.Message, + priority: 0, + } } + best := fallbackCandidate + consider := func(status metav1.ConditionStatus, reason, message string) { + // A generic "Pending" reason carries no actionable signal; skip it so + // it cannot displace an already-set specific reason from the provider. + if reason == pendingReason { + return + } + p := instanceBlockingReasonPriority(reason) + if p > best.priority { + best = candidate{status: status, reason: reason, message: message, priority: p} + } + } + + // Sub-conditions set by the provider (e.g. Available=Unknown/ImageUnavailable) + // may be more specific than the Programmed condition. Consult each one so + // the highest-priority reason wins, regardless of which condition carries it. + for _, cond := range instance.Status.Conditions { + if cond.Status == metav1.ConditionTrue { + // Satisfied conditions are not blocking; skip them. + continue + } + switch cond.Type { + case computev1alpha.InstanceProgrammed, + computev1alpha.InstanceReady, + computev1alpha.InstanceQuotaGranted: + // InstanceProgrammed is handled below; InstanceReady is being set + // now. InstanceQuotaGranted is a gate-level signal evaluated before + // this branch is reached — including it here would let a transient + // PendingEvaluation reason displace the generic not-programmed + // fallback when no provider sub-condition is set yet. + continue + } + consider(cond.Status, cond.Reason, cond.Message) + } + // Also let the Programmed condition itself compete through the priority table + // in case it carries a known reason (e.g. PendingQuota). + if programmedCondition != nil { + consider(programmedCondition.Status, programmedCondition.Reason, programmedCondition.Message) + } + + readyCondition.Status = best.status + readyCondition.Reason = best.reason + readyCondition.Message = best.message + return apimeta.SetStatusCondition(&instance.Status.Conditions, *readyCondition), nil } logger.Info("instance is programmed", "instance", instance.Name) - runningCondition := apimeta.FindStatusCondition(instance.Status.Conditions, computev1alpha.InstanceRunning) - if runningCondition == nil || runningCondition.Status != metav1.ConditionTrue { - logger.Info("instance is not running", "instance", instance.Name) - - readyCondition.Status = metav1.ConditionFalse - readyCondition.Reason = pendingReason - if runningCondition != nil && runningCondition.Reason != pendingReason { - readyCondition.Reason = runningCondition.Reason + availableCondition := apimeta.FindStatusCondition(instance.Status.Conditions, computev1alpha.InstanceAvailable) + if availableCondition == nil || availableCondition.Status != metav1.ConditionTrue { + logger.Info("instance is not available", "instance", instance.Name) + + // Propagate the Available condition's reason and message directly — + // including when the status is Unknown — so provider-set reasons like + // ImageUnavailable surface on Ready rather than a generic message. + readyStatus := metav1.ConditionFalse + readyReason := pendingReason + readyMessage := "Instance is not available" + if availableCondition != nil && availableCondition.Reason != pendingReason { + readyStatus = availableCondition.Status + readyReason = availableCondition.Reason + readyMessage = availableCondition.Message } - readyCondition.Message = "Instance is not running" - if runningCondition != nil && runningCondition.Status != metav1.ConditionUnknown { - readyCondition.Message = runningCondition.Message - } + readyCondition.Status = readyStatus + readyCondition.Reason = readyReason + readyCondition.Message = readyMessage return apimeta.SetStatusCondition(&instance.Status.Conditions, *readyCondition), nil } readyCondition.Status = metav1.ConditionTrue - readyCondition.Reason = computev1alpha.InstanceReadyReasonRunning + readyCondition.Reason = computev1alpha.InstanceReadyReasonAvailable readyCondition.Message = msgInstanceReady return apimeta.SetStatusCondition(&instance.Status.Conditions, *readyCondition), nil @@ -1033,15 +1299,20 @@ func (r *InstanceReconciler) SetupWithManager( return handler.TypedEnqueueRequestsFromMapFunc( func(ctx context.Context, obj client.Object) []mcreconcile.Request { claim := obj.(*quotav1alpha1.ResourceClaim) - if claim.Spec.ResourceRef.Name == "" { + // Map the claim back to its owning Instance. The Instance + // namespace is carried on a label (the claim itself lives in + // the project's quota namespace) and the Instance name is the + // claim name with the resource-kind prefix stripped. + instanceNamespace := claim.GetLabels()[instanceQuotaClaimNamespaceLabel] + if instanceNamespace == "" { return nil } return []mcreconcile.Request{ { Request: reconcile.Request{ NamespacedName: types.NamespacedName{ - Namespace: claim.Spec.ResourceRef.Namespace, - Name: claim.Spec.ResourceRef.Name, + Namespace: instanceNamespace, + Name: strings.TrimPrefix(claim.Name, instanceQuotaClaimNamePrefix), }, }, ClusterName: r.resolveClusterNameForProject(claim.Spec.ConsumerRef.Name), diff --git a/internal/controller/instance_controller_test.go b/internal/controller/instance_controller_test.go index 31636c3f..0d55dfd5 100644 --- a/internal/controller/instance_controller_test.go +++ b/internal/controller/instance_controller_test.go @@ -4,11 +4,14 @@ import ( "context" "fmt" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -175,7 +178,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { }, }, { - name: "instance programmed but not running should wait for running", + name: "instance programmed but not available should wait for available", instance: &computev1alpha.Instance{ ObjectMeta: metav1.ObjectMeta{ Name: testInstanceName, @@ -191,7 +194,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { Message: msgInstanceProgrammed, }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionFalse, Reason: testReasonString, Message: testMessageString, @@ -225,10 +228,10 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { Message: msgInstanceProgrammed, }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, }, }, }, @@ -237,7 +240,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { expectedCondition: &metav1.Condition{ Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, ObservedGeneration: 1, }, @@ -255,7 +258,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { { Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, ObservedGeneration: 1, LastTransitionTime: metav1.Now(), @@ -267,10 +270,10 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { Message: msgInstanceProgrammed, }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, }, }, }, @@ -279,7 +282,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { expectedCondition: &metav1.Condition{ Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, ObservedGeneration: 1, }, @@ -352,10 +355,10 @@ func TestReconcileInstanceReadyConditionWithQuota(t *testing.T) { LastTransitionTime: metav1.Now(), }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, LastTransitionTime: metav1.Now(), }, }, @@ -394,10 +397,10 @@ func TestReconcileInstanceReadyConditionWithQuota(t *testing.T) { LastTransitionTime: metav1.Now(), }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, LastTransitionTime: metav1.Now(), }, }, @@ -407,7 +410,7 @@ func TestReconcileInstanceReadyConditionWithQuota(t *testing.T) { expectedCondition: &metav1.Condition{ Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, }, }, @@ -479,7 +482,7 @@ func TestReconcileQuota(t *testing.T) { instanceName = "my-instance" ) - claimName := namespace + "--" + instanceName + claimName := instanceQuotaClaimNamePrefix + instanceName const deploymentName = "my-deployment" @@ -647,7 +650,7 @@ func TestReconcileQuota(t *testing.T) { assert.False(t, hasQuotaGate, "QuotaSchedulingGate must be removed in the same reconcile pass as the status update") }) - t.Run("quota exceeded flow: conditions cascade to block Programmed/Running/Ready", func(t *testing.T) { + t.Run("quota exceeded flow: conditions cascade to block Programmed/Available/Ready", func(t *testing.T) { s := newTestScheme(t) instance := makeInstance(s, computev1alpha.SchedulingGate{Name: instancecontrol.NetworkSchedulingGate.String()}, @@ -673,10 +676,10 @@ func TestReconcileQuota(t *testing.T) { assert.Equal(t, metav1.ConditionFalse, programmedCond.Status) assert.Equal(t, computev1alpha.InstanceProgrammedReasonPendingQuota, programmedCond.Reason) - runningCond := apimeta.FindStatusCondition(updated.Status.Conditions, computev1alpha.InstanceRunning) - require.NotNil(t, runningCond) - assert.Equal(t, metav1.ConditionFalse, runningCond.Status) - assert.Equal(t, computev1alpha.InstanceProgrammedReasonPendingQuota, runningCond.Reason) + availableCond := apimeta.FindStatusCondition(updated.Status.Conditions, computev1alpha.InstanceAvailable) + require.NotNil(t, availableCond) + assert.Equal(t, metav1.ConditionFalse, availableCond.Status) + assert.Equal(t, computev1alpha.InstanceProgrammedReasonPendingQuota, availableCond.Reason) readyCond := apimeta.FindStatusCondition(updated.Status.Conditions, computev1alpha.InstanceReady) require.NotNil(t, readyCond) @@ -812,7 +815,7 @@ func TestQuotaGateRemovedInSingleReconcile(t *testing.T) { deploymentName = "my-deployment" ) - claimName := namespace + "--" + instanceName + claimName := instanceQuotaClaimNamePrefix + instanceName tests := []struct { name string @@ -994,9 +997,9 @@ func TestReconcileQuotaSingleMode(t *testing.T) { deploymentName = "my-deployment" ) - // Claim name uses the edge namespace prefix (stable identifier for the claim) - // but the claim object itself lives in projectNS. - claimName := edgeNS + "--" + instanceName + // Claim name is the instance-prefixed Instance name; the claim object itself + // lives in projectNS (the instance's edge namespace is carried on a label). + claimName := instanceQuotaClaimNamePrefix + instanceName s := newTestScheme(t) @@ -1346,7 +1349,7 @@ func TestReconcileQuotaFailureModes(t *testing.T) { s := newTestScheme(t) fakeRecorder := record.NewFakeRecorder(10) - claimName := testNS + "--" + testInstance + claimName := instanceQuotaClaimNamePrefix + testInstance pendingClaim := "av1alpha1.ResourceClaim{ ObjectMeta: metav1.ObjectMeta{Name: claimName, Namespace: testNS}, Spec: quotav1alpha1.ResourceClaimSpec{ @@ -1498,7 +1501,7 @@ func TestReconcileQuotaFailureModes(t *testing.T) { WithStatusSubresource(&computev1alpha.Instance{}). Build() - claimName := testNS + "--" + testInstance + claimName := instanceQuotaClaimNamePrefix + testInstance grantedClaim := "av1alpha1.ResourceClaim{ ObjectMeta: metav1.ObjectMeta{Name: claimName, Namespace: testNS}, Spec: quotav1alpha1.ResourceClaimSpec{ @@ -1570,3 +1573,657 @@ func TestReconcileQuotaFailureModes(t *testing.T) { assert.Equal(t, int64(2), cond.ObservedGeneration, "condition must reflect current generation") }) } + +// TestQuotaPendingRequeueAfter verifies the backing-off safety-net requeue used +// while an instance's quota claim is still pending: 1s for the first minute, then +// 15s, then 60s after 5m, then 300s after 10m; and no requeue once granted. +func TestQuotaPendingRequeueAfter(t *testing.T) { + base := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) + + // created is the instance creation time; quota elapsed is measured from it + // (NOT the condition's LastTransitionTime, which stays at the 1970 default + // while quota is pending). The condition LastTransitionTime here is + // deliberately left at the 1970 zero value to mirror that production reality. + withQuota := func(s metav1.ConditionStatus, created time.Time) *computev1alpha.Instance { + return &computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: metav1.NewTime(created), + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{{ + Type: computev1alpha.InstanceQuotaGranted, + Status: s, + Reason: "PendingEvaluation", + }}, + }, + } + } + + tests := []struct { + name string + inst *computev1alpha.Instance + now time.Time + want time.Duration + }{ + {"granted -> no requeue", withQuota(metav1.ConditionTrue, base), base.Add(time.Hour), 0}, + {"no quota condition -> no requeue", &computev1alpha.Instance{}, base, 0}, + {"just pending -> 1s", withQuota(metav1.ConditionUnknown, base), base.Add(5 * time.Second), quotaPendingRequeueFast}, + {"59s -> 1s", withQuota(metav1.ConditionUnknown, base), base.Add(59 * time.Second), quotaPendingRequeueFast}, + {"60s boundary -> 15s", withQuota(metav1.ConditionUnknown, base), base.Add(60 * time.Second), quotaPendingRequeueMedium}, + {"3m -> 15s", withQuota(metav1.ConditionUnknown, base), base.Add(3 * time.Minute), quotaPendingRequeueMedium}, + {"5m boundary -> 60s", withQuota(metav1.ConditionUnknown, base), base.Add(5 * time.Minute), quotaPendingRequeueSlow}, + {"8m -> 60s", withQuota(metav1.ConditionUnknown, base), base.Add(8 * time.Minute), quotaPendingRequeueSlow}, + {"10m boundary -> 300s", withQuota(metav1.ConditionUnknown, base), base.Add(10 * time.Minute), quotaPendingRequeueIdle}, + {"1h -> 300s", withQuota(metav1.ConditionUnknown, base), base.Add(time.Hour), quotaPendingRequeueIdle}, + {"denied(False) still polls", withQuota(metav1.ConditionFalse, base), base.Add(2 * time.Minute), quotaPendingRequeueMedium}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, quotaPendingRequeueAfter(tc.inst, tc.now)) + }) + } +} + +// Shared literals for the instance-sizing / blocking-reason tests below +// (extracted to satisfy goconst). +const ( + testContainerName = "app" + testContainerImage = "test/image:latest" +) + +// TestReconcileInstanceReadyCondition_ProviderSubConditionSurfacing verifies +// that provider-set sub-condition reasons (e.g. ImageUnavailable written by the +// unikraft provider onto the Running condition) surface on Ready with both the +// reason AND the message preserved — even when the sub-condition status is +// Unknown (the normal state for a retriable image-pull failure). +// +// This is the primary regression-prevention test for the "generic message +// discards actionable reason" bug described in the status-blocking-reason RFC. +func TestReconcileInstanceReadyCondition_ProviderSubConditionSurfacing(t *testing.T) { + // These messages mirror the exact strings that translateWaitingReason in the + // unikraft provider writes. Both the reason AND the message must reach Ready. + const ( + msgImageUnavailable = "The instance image could not be pulled" + msgInstanceCrashing = "The instance is repeatedly failing to start" + msgConfigError = "The instance could not be started due to a configuration error" + msgProvisioning = "Instance is provisioning" + msgProgrammingInProgress = "Instance is being programmed" + ) + + noGates := func(inst *computev1alpha.Instance) *computev1alpha.Instance { return inst } + withQuotaGranted := func(inst *computev1alpha.Instance) *computev1alpha.Instance { + inst.Status.Conditions = append(inst.Status.Conditions, metav1.Condition{ + Type: computev1alpha.InstanceQuotaGranted, + Status: metav1.ConditionTrue, + Reason: computev1alpha.InstanceQuotaGrantedReasonQuotaAvailable, + Message: "Quota allocated", + }) + return inst + } + + tests := []struct { + name string + instance *computev1alpha.Instance + wantStatus metav1.ConditionStatus + wantReason string + wantMessage string + }{ + { + // The key scenario from the design: provider writes Running=Unknown/ + // ImageUnavailable while Programmed is still Unknown/ProgrammingInProgress. + // Ready must carry ImageUnavailable + the actionable message, NOT the + // generic "Instance has not been programmed". + name: "image_pull_failure_surfaces_on_ready", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + // Provider sets Running=Unknown/ImageUnavailable when the + // container enters an image-pull waiting state. + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + { + // Demonstrate the OLD (broken) behavior: if we used the pre-fix logic, + // the generic message would be emitted instead. This case would have + // FAILED before the fix, proving the test catches the regression. + // + // The old code: "if programmedCondition.Status != Unknown { copy message }" + // — since Programmed IS Unknown, message was locked to msgNotProgrammed. + // The test now asserts the NEW, correct output. + name: "old_behavior_generic_message_would_fail_this_assertion", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + // OLD code would produce: wantReason="PendingProgramming", wantMessage=msgNotProgrammed. + // The correct new behavior surfaces the actionable reason+message instead. + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + { + // When both a transient Provisioning and ImageUnavailable are present, + // ImageUnavailable (priority 5) must win over Provisioning (priority 1). + name: "image_unavailable_beats_transient_provisioning", + instance: noGates(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonProvisioning, + Message: msgProvisioning, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + { + // When no specific provider sub-condition exists but Programmed carries + // a specific reason (ProgrammingInProgress), that reason should + // pass-through to Ready. The generic msgNotProgrammed fallback is only + // used when Programmed is absent or carries only a generic "Pending" reason. + name: "programmed_in_progress_passes_through_when_no_provider_sub_condition", + instance: noGates(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + }, + }, + }), + // ProgrammingInProgress is more specific than PendingProgramming and + // passes through from Programmed → Ready. + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + wantMessage: msgProgrammingInProgress, + }, + { + // True generic fallback: no Programmed condition at all. The default + // PendingProgramming/msgNotProgrammed must be emitted. + name: "generic_fallback_when_programmed_condition_absent", + instance: noGates(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + }, + }), + wantStatus: metav1.ConditionFalse, + wantReason: computev1alpha.InstanceProgrammedReasonPendingProgramming, + wantMessage: msgNotProgrammed, + }, + { + // InstanceCrashing: terminal-ish (not retried indefinitely by the user, + // they must fix the app). Status=Unknown from provider → Ready=Unknown. + name: "instance_crashing_surfaces_on_ready", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonInstanceCrashing, + Message: msgInstanceCrashing, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonInstanceCrashing, + wantMessage: msgInstanceCrashing, + }, + { + // ConfigurationError: provider could not start the container due to a + // spec/config issue. User must correct the workload. + name: "configuration_error_surfaces_on_ready", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonConfigurationError, + Message: msgConfigError, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonConfigurationError, + wantMessage: msgConfigError, + }, + { + // When Programmed=True but Running=Unknown/ImageUnavailable, the + // running-not-true branch must also propagate the provider reason+message. + name: "image_unavailable_on_running_condition_programmed_true", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionTrue, + Reason: computev1alpha.InstanceProgrammedReasonProgrammed, + Message: msgInstanceProgrammed, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + } + + noNetworkFailure := func(_ context.Context, _ client.Client, _ *computev1alpha.Instance) (bool, string, error) { + return false, "", nil + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := &InstanceReconciler{} + _, err := r.reconcileInstanceReadyCondition(context.Background(), nil, tt.instance, noNetworkFailure) + require.NoError(t, err) + + ready := apimeta.FindStatusCondition(tt.instance.Status.Conditions, computev1alpha.InstanceReady) + require.NotNil(t, ready, "Ready condition must be set") + assert.Equal(t, tt.wantStatus, ready.Status, "Ready.Status mismatch") + assert.Equal(t, tt.wantReason, ready.Reason, "Ready.Reason mismatch") + assert.Equal(t, tt.wantMessage, ready.Message, "Ready.Message mismatch") + }) + } +} + +// TestResolveInstanceResources verifies the three-tier sizing precedence: +// explicit container Limits > instance-level Requests > instanceType catalog. +func TestResolveInstanceResources(t *testing.T) { + // d1Standard2 is the canonical catalog entry for datumcloud/d1-standard-2 + // (1 vCPU = 1000 millicores, 2 GiB = 2048 MiB) — the platform-declared quota + // size for the instance type. + const ( + d1CPUMillicores = int64(1000) + d1MemMiB = int64(2048) + ) + + cpu500m := resource.MustParse("500m") + cpu1 := resource.MustParse("1") + mem256Mi := resource.MustParse("256Mi") + mem512Mi := resource.MustParse("512Mi") + + makeContainerResources := func(cpu, mem resource.Quantity) *computev1alpha.ContainerResourceRequirements { + return &computev1alpha.ContainerResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: cpu, + corev1.ResourceMemory: mem, + }, + } + } + + tests := []struct { + name string + instance *computev1alpha.Instance + wantCPU int64 + wantMem int64 + wantResolved bool + }{ + { + // Common production case: instanceType only, no explicit limits. + // resolveInstanceResources must consult the catalog and return the + // d1-standard-2 values so vcpus + memory are included in the claim. + name: "instanceType only: d1-standard-2 resolves from catalog", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: instanceTypeD1Standard2, + }, + }, + }, + }, + wantCPU: d1CPUMillicores, + wantMem: d1MemMiB, + wantResolved: true, + }, + { + // Explicit container Limits take precedence over the catalog so that + // a workload with custom sizing is accounted at its actual footprint. + name: "explicit container limits override catalog", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: instanceTypeD1Standard2, + }, + Sandbox: &computev1alpha.SandboxRuntime{ + Containers: []computev1alpha.SandboxContainer{ + { + Name: testContainerName, + Image: testContainerImage, + Resources: makeContainerResources(cpu500m, mem256Mi), + }, + { + Name: "sidecar", + Image: "test/sidecar:latest", + Resources: makeContainerResources(cpu500m, mem256Mi), + }, + }, + }, + }, + }, + }, + // Two containers each contributing 500m CPU + 256 MiB → 1000m + 512 MiB. + wantCPU: 1000, + wantMem: 512, + wantResolved: true, + }, + { + // A single container with full cpu+memory Limits; no instanceType needed. + name: "single container limits, no instanceType", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Sandbox: &computev1alpha.SandboxRuntime{ + Containers: []computev1alpha.SandboxContainer{ + { + Name: testContainerName, + Image: testContainerImage, + Resources: makeContainerResources(cpu1, mem512Mi), + }, + }, + }, + }, + }, + }, + wantCPU: 1000, + wantMem: 512, + wantResolved: true, + }, + { + // Instance-level Requests (no sandbox, no instanceType) use path 2. + name: "instance-level resources.requests resolve correctly", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: cpu1, + corev1.ResourceMemory: mem512Mi, + }, + }, + }, + }, + }, + wantCPU: 1000, + wantMem: 512, + wantResolved: true, + }, + { + // An unknown instanceType with no explicit sizing must not fabricate + // values; the caller falls back to claiming instance count only. + name: "unknown instanceType, no explicit limits: unresolved", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: "datumcloud/unknown-type-99", + }, + }, + }, + }, + wantCPU: 0, + wantMem: 0, + wantResolved: false, + }, + { + // Empty instanceType and no explicit sizing: unresolved. + name: "empty instanceType, nothing explicit: unresolved", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{}, + }, + }, + }, + wantCPU: 0, + wantMem: 0, + wantResolved: false, + }, + { + // Sandbox containers without any Limits fall through to the catalog + // when an instanceType is set — partial container specs must not block + // catalog resolution. + name: "sandbox containers without limits fall through to catalog", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: instanceTypeD1Standard2, + }, + Sandbox: &computev1alpha.SandboxRuntime{ + Containers: []computev1alpha.SandboxContainer{ + { + Name: testContainerName, + Image: testContainerImage, + // No Resources.Limits set — common for UKC workloads. + }, + }, + }, + }, + }, + }, + wantCPU: d1CPUMillicores, + wantMem: d1MemMiB, + wantResolved: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cpu, mem, resolved := resolveInstanceResources(tt.instance) + assert.Equal(t, tt.wantResolved, resolved, "resolved mismatch") + assert.Equal(t, tt.wantCPU, cpu, "cpuMillicores mismatch") + assert.Equal(t, tt.wantMem, mem, "memMiB mismatch") + }) + } +} + +// TestReconcileQuotaClaim_RequestsIncludeVCPUsAndMemory confirms that when an +// instance is sized by instanceType alone (the typical production shape), the +// ResourceClaim created by reconcileQuotaClaim includes vcpus and memory +// requests in addition to the instance count, so the AllowanceBuckets are fed. +func TestReconcileQuotaClaim_RequestsIncludeVCPUsAndMemory(t *testing.T) { + const ( + clusterName = "test-project" + namespace = "default" + instanceName = "claim-resources-test" + ) + + claimName := instanceQuotaClaimNamePrefix + instanceName + + s := newTestScheme(t) + + // Instance sized by instanceType only — no container limits, no explicit + // instance-level requests. This is the common production workload shape. + instance := &computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: instanceName, + Namespace: namespace, + Finalizers: []string{instanceQuotaFinalizer, instanceControllerFinalizer}, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: testComputeAPIVersion, + Kind: kindWorkloadDeploymentTest, + Name: "owner-deployment", + UID: testUIDString, + Controller: func() *bool { b := true; return &b }(), + }, + }, + }, + Spec: computev1alpha.InstanceSpec{ + Controller: &computev1alpha.InstanceController{ + SchedulingGates: []computev1alpha.SchedulingGate{ + {Name: instancecontrol.QuotaSchedulingGate.String()}, + }, + }, + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + // No Requests, no container Limits — catalog must supply the values. + InstanceType: instanceTypeD1Standard2, + }, + }, + NetworkInterfaces: []computev1alpha.InstanceNetworkInterface{}, + }, + } + + deployment := &computev1alpha.WorkloadDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "owner-deployment", + Namespace: namespace, + UID: testUIDString, + }, + } + + projectClient := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(instance, deployment). + WithStatusSubresource(&computev1alpha.Instance{}). + Build() + + quotaClient := fake.NewClientBuilder(). + WithScheme(s). + WithStatusSubresource("av1alpha1.ResourceClaim{}). + Build() + + qm := quota.New(nil) + qm.StoreClient(clusterName, quotaClient) + + r := &InstanceReconciler{ + mgr: &fakeMCManager{clusters: map[string]cluster.Cluster{clusterName: newFakeCluster(projectClient)}}, + scheme: s, + quotaClientManager: qm, + edgeClusterName: testEdgeClusterName, + projectIDForInstance: func(_ context.Context, cn multicluster.ClusterName, _ *computev1alpha.Instance) (string, error) { + return string(cn), nil + }, + recorder: &record.FakeRecorder{}, + } + r.finalizers = finalizer.NewFinalizers() + require.NoError(t, r.finalizers.Register(instanceControllerFinalizer, r)) + + _, err := r.Reconcile(context.Background(), mcreconcile.Request{ + Request: reconcile.Request{NamespacedName: types.NamespacedName{Namespace: namespace, Name: instanceName}}, + ClusterName: clusterName, + }) + require.NoError(t, err) + + // Verify the created ResourceClaim carries vcpus and memory requests. + var createdClaim quotav1alpha1.ResourceClaim + require.NoError(t, quotaClient.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: claimName}, &createdClaim)) + + byType := make(map[string]int64, len(createdClaim.Spec.Requests)) + for _, req := range createdClaim.Spec.Requests { + byType[req.ResourceType] = req.Amount + } + + assert.Equal(t, int64(1), byType[quotaResourceTypeInstances], "instance count must be 1") + assert.Equal(t, int64(1000), byType["compute.datumapis.com/vcpus"], + "d1-standard-2 must claim 1000 millicores (1 vCPU)") + assert.Equal(t, int64(2048), byType["compute.datumapis.com/memory"], + "d1-standard-2 must claim 2048 MiB (2 GiB)") +} diff --git a/internal/controller/instance_writeback_test.go b/internal/controller/instance_writeback_test.go index 17c522f1..0112a630 100644 --- a/internal/controller/instance_writeback_test.go +++ b/internal/controller/instance_writeback_test.go @@ -92,7 +92,7 @@ func wbTestCellInstance() *computev1alpha.Instance { { Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: "Instance is ready", LastTransitionTime: metav1.Now(), }, diff --git a/internal/controller/instancecontrol/stateful/stateful_control.go b/internal/controller/instancecontrol/stateful/stateful_control.go index 2d2e3073..6dd8934d 100644 --- a/internal/controller/instancecontrol/stateful/stateful_control.go +++ b/internal/controller/instancecontrol/stateful/stateful_control.go @@ -53,8 +53,10 @@ func (c *statefulControl) GetActions( var createActions []instancecontrol.Action var waitActions []instancecontrol.Action - // highest -> lowest - var updateActions []instancecontrol.Action + // highest -> lowest. Instances whose template hash has drifted from the + // desired template are deleted and recreated (not updated in place) so the + // change actually rolls the backing pod — see the recreate branch below. + var recreateActions []instancecontrol.Action // highest -> lowest var deleteActions []instancecontrol.Action @@ -129,14 +131,19 @@ func (c *statefulControl) GetActions( if !apimeta.IsStatusConditionTrue(instance.Status.Conditions, v1alpha.InstanceReady) { waitActions = append(waitActions, instancecontrol.NewWaitAction(instance)) } else if needsUpdate(instance, instanceTemplateHash) { - updatedInstance := instance.DeepCopy() - updatedInstance.Annotations = deployment.Spec.Template.Annotations - updatedInstance.Labels = deployment.Spec.Template.Labels - - addInstanceControllerLabels(updatedInstance, getInstanceOrdinal(updatedInstance.Name), deployment) - - updatedInstance.Spec = deployment.Spec.Template.Spec - updateActions = append(updateActions, instancecontrol.NewUpdateAction(updatedInstance)) + // The instance's template hash no longer matches the desired + // template — e.g. an image change, or a restart requested via the + // RestartedAtAnnotation, which is part of the template hash. The + // unikraft provider bakes the pod's runtime, rootfs, and file + // mounts at pod-creation time and never reconciles an existing + // pod's spec, so an in-place Instance update would silently fail to + // roll the running workload. Delete the instance instead; the next + // reconcile recreates it from the current template via the create + // path above, and the provider tears down the old pod + // (finalizer-gated) and boots a fresh one. Ordered, one-at-a-time + // pacing is preserved by the descending-ordinal sort, the + // skip-all-but-first logic, and the DeletionTimestamp WaitAction. + recreateActions = append(recreateActions, instancecontrol.NewDeleteAction(instance)) } } } @@ -168,10 +175,10 @@ func (c *statefulControl) GetActions( } } - slices.SortFunc(updateActions, descendingOrdinal) + slices.SortFunc(recreateActions, descendingOrdinal) slices.SortFunc(deleteActions, descendingOrdinal) - actions := make([]instancecontrol.Action, 0, len(createActions)+len(waitActions)+len(updateActions)+len(deleteActions)+len(patchLabelActions)) + actions := make([]instancecontrol.Action, 0, len(createActions)+len(waitActions)+len(recreateActions)+len(deleteActions)+len(patchLabelActions)) switch deployment.Spec.ScaleSettings.InstanceManagementPolicy { case v1alpha.OrderedReadyInstanceManagementPolicyType: @@ -186,7 +193,7 @@ func (c *statefulControl) GetActions( slices.SortFunc(actions, ascendingOrdinal) - actions = append(actions, updateActions...) + actions = append(actions, recreateActions...) actions = append(actions, deleteActions...) // Skip all actions except the first one. diff --git a/internal/controller/instancecontrol/stateful/stateful_control_test.go b/internal/controller/instancecontrol/stateful/stateful_control_test.go index ffc04272..985c78b2 100644 --- a/internal/controller/instancecontrol/stateful/stateful_control_test.go +++ b/internal/controller/instancecontrol/stateful/stateful_control_test.go @@ -49,6 +49,11 @@ func TestFreshDeployment(t *testing.T) { assert.True(t, actions[1].IsSkipped()) } +// TestUpdateWithAllReadyInstances verifies that a template change on Ready +// instances rolls them by delete+recreate (not an in-place update), ordered +// highest-ordinal-first with only the first action active. An in-place update +// would never roll the backing pod, since the unikraft provider bakes the pod +// at creation time and ignores spec changes on an existing pod. func TestUpdateWithAllReadyInstances(t *testing.T) { ctx := context.Background() control := New() @@ -67,11 +72,11 @@ func TestUpdateWithAllReadyInstances(t *testing.T) { assert.Len(t, actions, 2) assert.Equal(t, "test-deploy-1", actions[0].Object.GetName()) - assert.Equal(t, instancecontrol.ActionTypeUpdate, actions[0].ActionType()) + assert.Equal(t, instancecontrol.ActionTypeDelete, actions[0].ActionType()) assert.False(t, actions[0].IsSkipped()) assert.Equal(t, "test-deploy-0", actions[1].Object.GetName()) - assert.Equal(t, instancecontrol.ActionTypeUpdate, actions[1].ActionType()) + assert.Equal(t, instancecontrol.ActionTypeDelete, actions[1].ActionType()) assert.True(t, actions[1].IsSkipped()) } @@ -244,38 +249,48 @@ func TestInstanceLabels_FourNewLabelsStamped(t *testing.T) { "PlacementNameLabel must equal deployment.Spec.PlacementName") } -// TestInstanceLabels_PropagatedOnUpdate verifies that when an existing instance -// is updated (rolling update path), the four new labels are refreshed from the -// deployment so they remain accurate after spec changes. -func TestInstanceLabels_PropagatedOnUpdate(t *testing.T) { +// TestInstanceLabels_RefreshedOnRecreate verifies that when a template change +// rolls an instance, the recreated instance carries the four self-describing +// labels sourced from the WorkloadDeployment. A template change no longer +// updates the instance in place; it deletes the drifted instance and recreates +// it via the create path on the following reconcile, which stamps the labels. +func TestInstanceLabels_RefreshedOnRecreate(t *testing.T) { ctx := context.Background() control := New() deployment := getWorkloadDeployment("test-labels-update", 1) - // Build a ready existing instance. + // A ready existing instance on the old template hash. currentInstances := []v1alpha.Instance{*getInstanceForDeployment(deployment, 0)} - // Trigger a rolling update by changing the image. + // Trigger a roll by changing the image. deployment.Spec.Template.Spec.Runtime.Sandbox.Containers[0].Image = "updated-image" + // First reconcile: the drifted instance is deleted (recreate), not updated. actions, err := control.GetActions(ctx, scheme, deployment, currentInstances) + assert.NoError(t, err) + assert.Len(t, actions, 1) + assert.Equal(t, instancecontrol.ActionTypeDelete, actions[0].ActionType()) + assert.Equal(t, "test-labels-update-0", actions[0].Object.GetName()) + // Next reconcile, after the old instance has been fully deleted and is gone: + // the empty slot is refilled by the create path, which stamps the labels. + actions, err = control.GetActions(ctx, scheme, deployment, nil) assert.NoError(t, err) assert.Len(t, actions, 1) - assert.Equal(t, instancecontrol.ActionTypeUpdate, actions[0].ActionType()) + assert.Equal(t, instancecontrol.ActionTypeCreate, actions[0].ActionType()) instance, ok := actions[0].Object.(*v1alpha.Instance) assert.True(t, ok) assert.Equal(t, deployment.GetName(), instance.Labels[v1alpha.WorkloadDeploymentNameLabel], - "WorkloadDeploymentNameLabel must be refreshed on update") + "WorkloadDeploymentNameLabel must be set on the recreated instance") assert.Equal(t, deployment.Spec.CityCode, instance.Labels[v1alpha.CityCodeLabel], - "CityCodeLabel must be refreshed on update") + "CityCodeLabel must be set on the recreated instance") assert.Equal(t, deployment.Spec.WorkloadRef.Name, instance.Labels[v1alpha.WorkloadNameLabel], - "WorkloadNameLabel must be refreshed on update") + "WorkloadNameLabel must be set on the recreated instance") assert.Equal(t, deployment.Spec.PlacementName, instance.Labels[v1alpha.PlacementNameLabel], - "PlacementNameLabel must be refreshed on update") + "PlacementNameLabel must be set on the recreated instance") } // TestInstanceLocation_SetWhenDeploymentStatusLocationPresent verifies that when @@ -331,7 +346,7 @@ func TestInstanceLocation_NilWhenDeploymentStatusLocationAbsent(t *testing.T) { // TestLabelBackfill_NotReadyMatchingHash verifies that a not-Ready instance // with an unchanged template hash receives a PatchLabels action when it is -// missing controller-managed labels. The action must not be a rollout Update, +// missing controller-managed labels. The action must not be a rollout recreate, // must not alter spec/template, and must not block subsequent instances. func TestLabelBackfill_NotReadyMatchingHash(t *testing.T) { ctx := context.Background() @@ -361,15 +376,15 @@ func TestLabelBackfill_NotReadyMatchingHash(t *testing.T) { assert.NoError(t, err) // Collect actions by type. - var waitActions, createActions, updateActions, patchActions []instancecontrol.Action + var waitActions, createActions, recreateActions, patchActions []instancecontrol.Action for _, a := range actions { switch a.ActionType() { case instancecontrol.ActionTypeWait: waitActions = append(waitActions, a) case instancecontrol.ActionTypeCreate: createActions = append(createActions, a) - case instancecontrol.ActionTypeUpdate: - updateActions = append(updateActions, a) + case instancecontrol.ActionTypeDelete: + recreateActions = append(recreateActions, a) case instancecontrol.ActionTypePatchLabels: patchActions = append(patchActions, a) } @@ -383,8 +398,8 @@ func TestLabelBackfill_NotReadyMatchingHash(t *testing.T) { assert.Len(t, createActions, 1, "instance-1 create action must be present") assert.True(t, createActions[0].IsSkipped(), "create for instance-1 must be skipped while instance-0 is waiting") - // No template Update actions must be produced. - assert.Empty(t, updateActions, "no template Update must be produced for a matching-hash instance") + // No rollout recreate actions must be produced. + assert.Empty(t, recreateActions, "no rollout recreate must be produced for a matching-hash instance") // A PatchLabels action must be produced for instance-0. assert.Len(t, patchActions, 1, "exactly one PatchLabels action for the label-drifted instance") @@ -439,7 +454,7 @@ func TestLabelBackfill_Idempotent(t *testing.T) { // TestLabelBackfill_ReadyInstanceCorrected verifies that a Ready instance with // correct template hash but drifted labels receives a PatchLabels action -// without triggering a template rollout Update. +// without triggering a rollout recreate. func TestLabelBackfill_ReadyInstanceCorrected(t *testing.T) { ctx := context.Background() control := New() @@ -456,18 +471,18 @@ func TestLabelBackfill_ReadyInstanceCorrected(t *testing.T) { assert.NoError(t, err) - var updateActions, patchActions []instancecontrol.Action + var recreateActions, patchActions []instancecontrol.Action for _, a := range actions { switch a.ActionType() { - case instancecontrol.ActionTypeUpdate: - updateActions = append(updateActions, a) + case instancecontrol.ActionTypeDelete: + recreateActions = append(recreateActions, a) case instancecontrol.ActionTypePatchLabels: patchActions = append(patchActions, a) } } - // No template Update must be produced — template hash matches. - assert.Empty(t, updateActions, "no template Update must be produced for a matching-hash ready instance") + // No rollout recreate must be produced — template hash matches. + assert.Empty(t, recreateActions, "no rollout recreate must be produced for a matching-hash ready instance") // A PatchLabels action must be produced. assert.Len(t, patchActions, 1, "PatchLabels action must be produced for the label-drifted ready instance") @@ -478,8 +493,9 @@ func TestLabelBackfill_ReadyInstanceCorrected(t *testing.T) { } // TestLabelBackfill_DoesNotAffectRollingUpdate verifies that a genuine template -// change on a Ready instance still produces a normal ordered Update action and -// that the PatchLabels path does not interfere with or duplicate it. +// change on a Ready instance still produces the normal ordered roll (a recreate +// Delete per instance) and that the PatchLabels path does not interfere with or +// duplicate it. func TestLabelBackfill_DoesNotAffectRollingUpdate(t *testing.T) { ctx := context.Background() control := New() @@ -516,23 +532,23 @@ func TestLabelBackfill_DoesNotAffectRollingUpdate(t *testing.T) { assert.NoError(t, err) - var updateActions, patchActions []instancecontrol.Action + var recreateActions, patchActions []instancecontrol.Action for _, a := range actions { switch a.ActionType() { - case instancecontrol.ActionTypeUpdate: - updateActions = append(updateActions, a) + case instancecontrol.ActionTypeDelete: + recreateActions = append(recreateActions, a) case instancecontrol.ActionTypePatchLabels: patchActions = append(patchActions, a) } } - // Two Update actions expected (one per instance), ordered highest-to-lowest. - assert.Len(t, updateActions, 2, "both instances must produce Update actions on template change") - assert.Equal(t, "test-backfill-rolling-1", updateActions[0].Object.GetName(), - "Update actions must be ordered highest ordinal first") - assert.Equal(t, "test-backfill-rolling-0", updateActions[1].Object.GetName()) - assert.False(t, updateActions[0].IsSkipped(), "first Update must be active") - assert.True(t, updateActions[1].IsSkipped(), "second Update must be skipped (ordered rollout)") + // Two recreate (Delete) actions expected (one per instance), ordered highest-to-lowest. + assert.Len(t, recreateActions, 2, "both instances must produce recreate actions on template change") + assert.Equal(t, "test-backfill-rolling-1", recreateActions[0].Object.GetName(), + "recreate actions must be ordered highest ordinal first") + assert.Equal(t, "test-backfill-rolling-0", recreateActions[1].Object.GetName()) + assert.False(t, recreateActions[0].IsSkipped(), "first recreate must be active") + assert.True(t, recreateActions[1].IsSkipped(), "second recreate must be skipped (ordered rollout)") // No PatchLabels — all labels are already correct. assert.Empty(t, patchActions, "no PatchLabels when all labels are already correct") diff --git a/internal/controller/workload_controller.go b/internal/controller/workload_controller.go index 6ca92e03..34f55def 100644 --- a/internal/controller/workload_controller.go +++ b/internal/controller/workload_controller.go @@ -220,6 +220,7 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( newWorkloadStatus := workload.Status.DeepCopy() totalReplicas := int32(0) totalCurrentReplicas := int32(0) + totalUpdatedReplicas := int32(0) totalDesiredReplicas := int32(0) totalReadyReplicas := int32(0) totalDeployments := int32(0) @@ -251,12 +252,14 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( foundAvailableDeployment := false replicas := int32(0) currentReplicas := int32(0) + updatedReplicas := int32(0) desiredReplicas := int32(0) readyReplicas := int32(0) totalDeployments += int32(len(placementDeployments)) for _, deployment := range placementDeployments { replicas += deployment.Status.Replicas currentReplicas += deployment.Status.CurrentReplicas + updatedReplicas += deployment.Status.UpdatedReplicas desiredReplicas += deployment.Status.DesiredReplicas readyReplicas += deployment.Status.ReadyReplicas @@ -266,11 +269,13 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( } totalReplicas += replicas totalCurrentReplicas += currentReplicas + totalUpdatedReplicas += updatedReplicas totalDesiredReplicas += desiredReplicas totalReadyReplicas += readyReplicas placementStatus.Replicas = replicas placementStatus.CurrentReplicas = currentReplicas + placementStatus.UpdatedReplicas = updatedReplicas placementStatus.DesiredReplicas = desiredReplicas placementStatus.ReadyReplicas = readyReplicas @@ -304,8 +309,10 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( newWorkloadStatus.Deployments = totalDeployments newWorkloadStatus.Replicas = totalReplicas newWorkloadStatus.CurrentReplicas = totalCurrentReplicas + newWorkloadStatus.UpdatedReplicas = totalUpdatedReplicas newWorkloadStatus.DesiredReplicas = totalDesiredReplicas newWorkloadStatus.ReadyReplicas = totalReadyReplicas + newWorkloadStatus.ObservedGeneration = workload.Generation if equality.Semantic.DeepEqual(workload.Status, newWorkloadStatus) { return nil diff --git a/internal/controller/workloaddeployment_controller.go b/internal/controller/workloaddeployment_controller.go index 9b17266e..d4fd9919 100644 --- a/internal/controller/workloaddeployment_controller.go +++ b/internal/controller/workloaddeployment_controller.go @@ -171,15 +171,17 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco desiredReplicas = 0 } - currentReplicas, readyReplicas, quotaBlockedReplicas, err := r.reconcileInstanceGates(ctx, cl.GetClient(), &deployment, instances.Items, networkReady) + currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, err := r.reconcileInstanceGates(ctx, cl.GetClient(), &deployment, instances.Items, networkReady) if err != nil { return ctrl.Result{}, err } deployment.Status.Replicas = int32(replicas) deployment.Status.CurrentReplicas = int32(currentReplicas) + deployment.Status.UpdatedReplicas = int32(updatedReplicas) deployment.Status.DesiredReplicas = desiredReplicas deployment.Status.ReadyReplicas = int32(readyReplicas) + deployment.Status.ObservedGeneration = deployment.Generation if quotaBlockedReplicas > 0 { apimeta.SetStatusCondition(&deployment.Status.Conditions, metav1.Condition{ @@ -239,7 +241,7 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates( deployment *computev1alpha.WorkloadDeployment, instances []computev1alpha.Instance, networkReady bool, -) (currentReplicas, readyReplicas, quotaBlockedReplicas int, err error) { +) (currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas int, err error) { templateHash := instancecontrol.ComputeHash(deployment.Spec.Template) for _, instance := range instances { if apimeta.IsStatusConditionPresentAndEqual(instance.Status.Conditions, computev1alpha.InstanceQuotaGranted, metav1.ConditionFalse) { @@ -255,22 +257,34 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates( instance.Spec.Controller.SchedulingGates = newGates return nil }); patchErr != nil { - return 0, 0, 0, fmt.Errorf("failed updating instance: %w", patchErr) + return 0, 0, 0, 0, fmt.Errorf("failed updating instance: %w", patchErr) } } } - if apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceProgrammed) { - if instance.Status.Controller.ObservedTemplateHash == templateHash { - currentReplicas++ - } + // An instance is "updated" once it has observed the desired template + // revision, regardless of readiness. Counting these (even before they are + // Programmed) makes a rolling update / restart observable: UpdatedReplicas + // dips below Replicas while the recreated instance comes up, then recovers. + // Status.Controller is a pointer the infra provider may not have populated + // yet; guard the deref to avoid a panic that would abort the reconcile. + onLatestRevision := instance.Status.Controller != nil && + instance.Status.Controller.ObservedTemplateHash == templateHash + if onLatestRevision { + updatedReplicas++ + } + + // CurrentReplicas is the Programmed subset of UpdatedReplicas — updated + // instances that are ready to serve. + if onLatestRevision && apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceProgrammed) { + currentReplicas++ } if apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceReady) { readyReplicas++ } } - return currentReplicas, readyReplicas, quotaBlockedReplicas, nil + return currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, nil } // writeStatusToKarmada copies the WorkloadDeployment status to the matching diff --git a/internal/controller/workloaddeployment_federator.go b/internal/controller/workloaddeployment_federator.go index 9c736cf0..651db2e6 100644 --- a/internal/controller/workloaddeployment_federator.go +++ b/internal/controller/workloaddeployment_federator.go @@ -14,17 +14,22 @@ import ( "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/cluster" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/finalizer" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" mccontext "sigs.k8s.io/multicluster-runtime/pkg/context" + mchandler "sigs.k8s.io/multicluster-runtime/pkg/handler" mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" + "sigs.k8s.io/multicluster-runtime/pkg/multicluster" mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" karmadapolicyv1alpha1 "github.com/karmada-io/api/policy/v1alpha1" computev1alpha "go.datum.net/compute/api/v1alpha" "go.miloapis.com/milo/pkg/downstreamclient" + milosource "go.miloapis.com/milo/pkg/multicluster-runtime/source" ) const ( @@ -69,7 +74,15 @@ type WorkloadDeploymentFederator struct { // plane (the federation hub that the management controllers read and write // through). The caller (cmd/main.go) constructs it from --federation-kubeconfig. FederationClient client.Client - finalizers finalizer.Finalizers + // FederationCluster is a watchable cluster handle for the same Karmada + // federation control plane that FederationClient talks to. It is used to set + // up an informer-backed watch on the downstream WorkloadDeployment objects so + // that status aggregated by Karmada onto the downstream WD is mirrored back to + // the project-namespace WD immediately, rather than waiting for the next + // informer resync. When nil (e.g. in unit tests), the downstream watch is + // skipped and the controller falls back to watching only the VCP WD. + FederationCluster cluster.Cluster + finalizers finalizer.Finalizers } // +kubebuilder:rbac:groups=compute.datumapis.com,resources=workloaddeployments,verbs=get;list;watch;update;patch @@ -84,6 +97,16 @@ func (r *WorkloadDeploymentFederator) Reconcile(ctx context.Context, req mcrecon logger := log.FromContext(ctx) + // An empty cluster name resolves to the local host management cluster, which + // has no compute CRDs — any Get would fail with "no matches for kind" and + // requeue in a hot loop. The For watch (EngageWithLocalCluster=false) and the + // preservation-wrapped downstream watch both set a real project cluster name, + // so an empty name here is never legitimate. Drop it without erroring. + if req.ClusterName == "" { + logger.V(1).Info("dropping reconcile with empty cluster name") + return ctrl.Result{}, nil + } + cl, err := r.mgr.GetCluster(ctx, req.ClusterName) if err != nil { return ctrl.Result{}, err @@ -383,16 +406,150 @@ func (r *WorkloadDeploymentFederator) cleanupPropagationPolicyIfUnused( // SetupWithManager registers the controller with the multicluster manager. // It must only be called when FederationClient is non-nil. +// +// The controller watches two control planes: +// +// - The VCP/project WorkloadDeployment (via For), so spec changes in the +// project namespace trigger federation to the downstream control plane. +// - The downstream Karmada WorkloadDeployment (via WatchesRawSource against +// FederationCluster), so when Karmada aggregates new status onto the +// downstream WD the corresponding project WD is reconciled immediately and +// the status is mirrored back. Without this second watch the federator only +// caught up on the next informer resync (~10h), causing status lag. func (r *WorkloadDeploymentFederator) SetupWithManager(mgr mcmanager.Manager) error { r.mgr = mgr r.finalizers = finalizer.NewFinalizers() if err := r.finalizers.Register(federatorFinalizer, r); err != nil { return fmt.Errorf("failed to register federator finalizer: %w", err) } - return mcbuilder.ControllerManagedBy(mgr). + + b := mcbuilder.ControllerManagedBy(mgr). For(&computev1alpha.WorkloadDeployment{}, mcbuilder.WithEngageWithLocalCluster(false)). - Named("workload-deployment-federator"). - Complete(r) + Named("workload-deployment-federator") + + // Watch the downstream Karmada WorkloadDeployment whose status we mirror. + // FederationCluster is a watchable handle for the federation control plane; + // it is nil in unit tests, where only the For watch is exercised. + // + // The handler MUST preserve the ClusterName that mapDownstreamDeploymentToRequest + // sets. milosource binds the raw source to the empty cluster name, and the + // default TypedEnqueueRequestsFromMapFunc wraps the map in TypedInjectCluster, + // which overwrites each request's ClusterName with that bound empty name — so + // every request would resolve to the local host cluster (no compute CRDs) and + // fail with "no matches for kind WorkloadDeployment". The preservation variant + // skips that injection so our project-cluster ClusterName survives to Reconcile. + if r.FederationCluster != nil { + preserveClusterName := func(_ multicluster.ClusterName, _ cluster.Cluster) handler.TypedEventHandler[*computev1alpha.WorkloadDeployment, mcreconcile.Request] { + return mchandler.TypedEnqueueRequestsFromMapFuncWithClusterPreservation(r.mapDownstreamDeploymentToRequest) + } + b = b.WatchesRawSource(milosource.MustNewClusterSource( + r.FederationCluster, + &computev1alpha.WorkloadDeployment{}, + preserveClusterName, + )) + } + + return b.Complete(r) +} + +// mapDownstreamDeploymentToRequest maps an event on a downstream Karmada +// WorkloadDeployment to a reconcile request for the corresponding +// project-namespace WorkloadDeployment. +// +// Correlation mirrors the identity the federator establishes when it mirrors the +// object downstream (see upsertDownstreamDeployment / ensureDownstreamNamespace): +// +// - The WD name is stable across all planes, so the request name equals the +// downstream WD name. +// - upsertDownstreamDeployment stamps the downstream WD with +// UpstreamOwnerNamespaceLabel = the project namespace, which becomes the +// request namespace. +// - The project cluster name is not on the WD itself; ensureDownstreamNamespace +// stamps it as UpstreamOwnerClusterNameLabel on the downstream namespace +// (encoded "cluster-" with "/" -> "_"). We read the namespace from the +// federation plane to recover and decode it. +// +// Events lacking the required correlation labels (e.g. WorkloadDeployments not +// created by this federator) are dropped. +func (r *WorkloadDeploymentFederator) mapDownstreamDeploymentToRequest( + ctx context.Context, + downstream *computev1alpha.WorkloadDeployment, +) []mcreconcile.Request { + logger := log.FromContext(ctx) + + projectNamespace := downstream.Labels[downstreamclient.UpstreamOwnerNamespaceLabel] + if projectNamespace == "" { + // Not federated by us (no upstream-namespace label) — nothing to enqueue. + return nil + } + + // Recover the project cluster name from the downstream namespace label. + var ns corev1.Namespace + if err := r.FederationCluster.GetClient().Get(ctx, types.NamespacedName{Name: downstream.Namespace}, &ns); err != nil { + logger.V(1).Info("unable to resolve downstream namespace for status mapping; dropping event", + "downstreamNamespace", downstream.Namespace, "error", err) + return nil + } + encodedClusterName := ns.Labels[downstreamclient.UpstreamOwnerClusterNameLabel] + if encodedClusterName == "" { + logger.V(1).Info("downstream namespace missing upstream-cluster-name label; dropping event", + "downstreamNamespace", downstream.Namespace) + return nil + } + clusterName := projectClusterNameFromLabel(encodedClusterName) + if clusterName == "" { + logger.V(1).Info("undecodable upstream-cluster-name label; dropping event", + "downstreamNamespace", downstream.Namespace, "encoded", encodedClusterName) + return nil + } + + // Verify the project cluster is engaged before enqueuing. The Milo + // multicluster provider keys clusters by bare project name, and GetCluster + // returns an error for an unknown name. Without this guard, an unresolvable + // name — or the empty string, which mcmanager routes to the local host + // cluster that has no compute CRDs — would make Reconcile fail with + // "no matches for kind WorkloadDeployment" in a hot loop. Dropping the event + // is safe: once the provider engages the project cluster, the For watch + // reconciles it and the next downstream status event maps cleanly. + if _, err := r.mgr.GetCluster(ctx, multicluster.ClusterName(clusterName)); err != nil { + logger.V(1).Info("project cluster not engaged for downstream status mapping; dropping event", + "clusterName", clusterName, "downstreamNamespace", downstream.Namespace, "error", err) + return nil + } + + return []mcreconcile.Request{ + { + ClusterName: multicluster.ClusterName(clusterName), + Request: ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: projectNamespace, + Name: downstream.Name, + }, + }, + }, + } +} + +// projectClusterNameFromLabel extracts the project cluster name that the Milo +// multicluster provider uses as its cluster key from a downstream namespace's +// UpstreamOwnerClusterNameLabel value. +// +// MappedNamespaceResourceStrategy encodes the label as "cluster-_" +// (with "/" replaced by "_"), e.g. "cluster-datum-cloud" (no org) or +// "cluster-_test-project-abc" (empty org). The provider, however, keys clusters +// by bare project name only (multicluster provider: key = project.Name), so we +// strip the "cluster-" prefix, decode "_" back to "/", and return the final path +// segment — the project name. Examples: +// +// "cluster-datum-cloud" -> "datum-cloud" +// "cluster-_test-project-abc" -> "test-project-abc" +func projectClusterNameFromLabel(encoded string) string { + name := strings.TrimPrefix(encoded, "cluster-") + name = strings.ReplaceAll(name, "_", "/") + if i := strings.LastIndex(name, "/"); i >= 0 { + name = name[i+1:] + } + return name } // propagationPolicyNameFor returns the PropagationPolicy name for a given city diff --git a/internal/controller/workloaddeployment_federator_test.go b/internal/controller/workloaddeployment_federator_test.go index 2bd2169f..84a6f76d 100644 --- a/internal/controller/workloaddeployment_federator_test.go +++ b/internal/controller/workloaddeployment_federator_test.go @@ -4,6 +4,7 @@ package controller import ( "context" + "strings" "testing" "time" @@ -21,6 +22,7 @@ import ( mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" computev1alpha "go.datum.net/compute/api/v1alpha" + "go.miloapis.com/milo/pkg/downstreamclient" ) // ─── Shared test constants ──────────────────────────────────────────────────── @@ -118,6 +120,146 @@ func reconcileRequest() mcreconcile.Request { // ─── Unit tests ─────────────────────────────────────────────────────────────── +// TestMapDownstreamDeploymentToRequest verifies the downstream-WD → project-WD +// mapping used by the cross-plane status watch: the request name equals the +// downstream WD name, the namespace comes from the WD's upstream-namespace label, +// and the cluster name is decoded from the downstream namespace's +// upstream-cluster-name label. Events lacking correlation metadata are dropped. +func TestMapDownstreamDeploymentToRequest(t *testing.T) { + t.Parallel() + + // The encoded cluster name on the downstream namespace decodes to testCluster. + encodedCluster := "cluster-" + strings.ReplaceAll(testCluster, "/", "_") + + downstreamNS := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testKarmadaNSStr, + Labels: map[string]string{ + downstreamclient.UpstreamOwnerClusterNameLabel: encodedCluster, + }, + }, + } + + // A downstream namespace whose cluster label decodes to a project cluster the + // manager has not engaged — used to verify the not-engaged drop path. + unknownClusterNS := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testKarmadaNSStr, + Labels: map[string]string{ + downstreamclient.UpstreamOwnerClusterNameLabel: "cluster-unregistered-project", + }, + }, + } + + newDownstreamWD := func(labels map[string]string) *computev1alpha.WorkloadDeployment { + return &computev1alpha.WorkloadDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: testWDName, + Namespace: testKarmadaNSStr, + Labels: labels, + }, + } + } + + tests := []struct { + name string + karmadaObjs []client.Object + downstreamWD *computev1alpha.WorkloadDeployment + want []mcreconcile.Request + }{ + { + name: "maps to project WD request", + karmadaObjs: []client.Object{downstreamNS}, + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: []mcreconcile.Request{ + { + ClusterName: testCluster, + Request: ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: testProjNS, + Name: testWDName, + }, + }, + }, + }, + }, + { + name: "missing upstream-namespace label is dropped", + karmadaObjs: []client.Object{downstreamNS}, + downstreamWD: newDownstreamWD(nil), + want: nil, + }, + { + name: "missing downstream namespace is dropped", + karmadaObjs: nil, // namespace not present in federation cluster + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: nil, + }, + { + name: "namespace without cluster label is dropped", + karmadaObjs: []client.Object{&corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: testKarmadaNSStr}, + }}, + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: nil, + }, + { + name: "project cluster not engaged is dropped", + karmadaObjs: []client.Object{unknownClusterNS}, + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + karmadaClient := newKarmadaFakeClient(tt.karmadaObjs...) + r := &WorkloadDeploymentFederator{ + // Only testCluster is engaged; the not-engaged case decodes to a + // different project name and must be dropped by the GetCluster guard. + mgr: newFakeMCManager(testCluster, newFakeCluster(karmadaClient)), + FederationClient: karmadaClient, + FederationCluster: newFakeCluster(karmadaClient), + } + + got := r.mapDownstreamDeploymentToRequest(context.Background(), tt.downstreamWD) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestProjectClusterNameFromLabel(t *testing.T) { + t.Parallel() + + tests := []struct { + encoded string + want string + }{ + {"cluster-datum-cloud", "datum-cloud"}, + // Org-scoped encodings decode to org/project; the provider keys on the + // bare project name, so only the final path segment is returned. + {"cluster-org_project", "project"}, + {"cluster-_test-project-abc", "test-project-abc"}, + {"cluster-test-project-cluster", "test-project-cluster"}, + } + for _, tt := range tests { + t.Run(tt.encoded, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, projectClusterNameFromLabel(tt.encoded)) + }) + } +} + func TestPropagationPolicyNameFor(t *testing.T) { t.Parallel() @@ -155,6 +297,28 @@ func TestWorkloadDeploymentFederator_NoFederationClient(t *testing.T) { assert.Equal(t, ctrl.Result{}, result) } +// TestWorkloadDeploymentFederator_EmptyClusterNameDropped verifies that a +// reconcile request carrying an empty cluster name is dropped without error +// (and without touching GetCluster), so it can never fall back to the local +// host cluster and spin in a "no matches for kind" requeue loop. +func TestWorkloadDeploymentFederator_EmptyClusterNameDropped(t *testing.T) { + t.Parallel() + + projectClient := newProjectFakeClient(testProjectNamespace(), testWorkloadDeployment()) + karmadaClient := newKarmadaFakeClient() + r := newTestFederator(projectClient, karmadaClient) + + req := mcreconcile.Request{ + ClusterName: "", + Request: ctrl.Request{ + NamespacedName: types.NamespacedName{Name: testWDName, Namespace: testProjNS}, + }, + } + result, err := r.Reconcile(context.Background(), req) + require.NoError(t, err) + assert.Equal(t, ctrl.Result{}, result) +} + // TestWorkloadDeploymentFederator_AddsFinalizerOnFirstSeen verifies that the // first reconcile of a brand-new WorkloadDeployment adds the finalizer and // returns without federating (the finalizer update triggers a re-queue).