diff --git a/cmd/kubectl-datadog/autoscaling/cluster/cluster.go b/cmd/kubectl-datadog/autoscaling/cluster/cluster.go
index b66e041d97..de74bc0b06 100644
--- a/cmd/kubectl-datadog/autoscaling/cluster/cluster.go
+++ b/cmd/kubectl-datadog/autoscaling/cluster/cluster.go
@@ -6,6 +6,7 @@ import (
 	"github.com/spf13/cobra"
 	"k8s.io/cli-runtime/pkg/genericclioptions"
 
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/evict"
 	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/install"
 	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/uninstall"
 	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/update"
@@ -35,6 +36,7 @@ func New(streams genericclioptions.IOStreams) *cobra.Command {
 	cmd.AddCommand(install.New(streams))
 	cmd.AddCommand(uninstall.New(streams))
 	cmd.AddCommand(update.New(streams))
+	cmd.AddCommand(evict.New(streams))
 
 	o := newOptions(streams)
 	o.configFlags.AddFlags(cmd.Flags())
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/common/aws/node.go b/cmd/kubectl-datadog/autoscaling/cluster/common/aws/node.go
new file mode 100644
index 0000000000..050e07905d
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/common/aws/node.go
@@ -0,0 +1,36 @@
+package aws
+
+import (
+	"regexp"
+
+	corev1 "k8s.io/api/core/v1"
+)
+
+// awsProviderIDRegexp matches the AWS provider ID for EC2-backed nodes.
+// Format: aws:///<az>/i-<hex> (e.g. aws:///us-east-1a/i-0abc123def456789).
+// Fargate nodes use a different shape (aws:///<az>/fargate-ip-...) and must
+// therefore be classified by label before reaching this regex.
+var awsProviderIDRegexp = regexp.MustCompile(`^aws:///[^/]+/(i-[0-9a-f]+)$`)
+
+// LabelEKSNodegroup is the label EKS stamps on every node that belongs to a
+// managed node group. The label value is the node group name. Exposed as a
+// constant so every consumer (classifier, evict-legacy-nodes, future code)
+// references the same string.
+const LabelEKSNodegroup = "eks.amazonaws.com/nodegroup"
+
+// ExtractEC2InstanceID returns the EC2 instance ID (i-...) from a Node's
+// providerID, or false when the providerID is not an EC2 instance (Fargate
+// uses `aws:///<az>/fargate-ip-...`, GCP/Azure use entirely different shapes,
+// etc.). Lives here in `common/aws` so both `common/clusterinfo` (which
+// imports `common/karpenter`) and `common/karpenter` (which classifies its
+// own nodes) can use it without creating an import cycle.
+func ExtractEC2InstanceID(node *corev1.Node) (string, bool) {
+	if node == nil {
+		return "", false
+	}
+	m := awsProviderIDRegexp.FindStringSubmatch(node.Spec.ProviderID)
+	if len(m) != 2 {
+		return "", false
+	}
+	return m[1], true
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/common/aws/node_test.go b/cmd/kubectl-datadog/autoscaling/cluster/common/aws/node_test.go
new file mode 100644
index 0000000000..c13e9a0ad0
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/common/aws/node_test.go
@@ -0,0 +1,38 @@
+package aws
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	corev1 "k8s.io/api/core/v1"
+)
+
+func TestExtractEC2InstanceID(t *testing.T) {
+	for _, tc := range []struct {
+		name     string
+		provider string
+		wantID   string
+		wantOK   bool
+	}{
+		{name: "ec2", provider: "aws:///eu-west-3a/i-0123456789abcdef0", wantID: "i-0123456789abcdef0", wantOK: true},
+		{name: "ec2 short id", provider: "aws:///us-east-1b/i-abc123", wantID: "i-abc123", wantOK: true},
+		{name: "fargate", provider: "aws:///eu-west-3a/fargate-ip-10-0-1-2", wantOK: false},
+		{name: "gcp", provider: "gce://project/zone/instance", wantOK: false},
+		{name: "empty", provider: "", wantOK: false},
+		{name: "missing prefix", provider: "i-0123456789abcdef0", wantOK: false},
+		{name: "missing AZ", provider: "aws:////i-0123456789abcdef0", wantOK: false},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			node := &corev1.Node{Spec: corev1.NodeSpec{ProviderID: tc.provider}}
+			id, ok := ExtractEC2InstanceID(node)
+			assert.Equal(t, tc.wantOK, ok)
+			if tc.wantOK {
+				assert.Equal(t, tc.wantID, id)
+			}
+		})
+	}
+	t.Run("nil node", func(t *testing.T) {
+		_, ok := ExtractEC2InstanceID(nil)
+		assert.False(t, ok)
+	})
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo/classify.go b/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo/classify.go
index 0403b5ae63..79d1528d7a 100644
--- a/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo/classify.go
+++ b/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo/classify.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"log"
 	"maps"
-	"regexp"
 	"strings"
 	"time"
 
@@ -36,11 +35,6 @@ import (
 // ValidationError at the API.
 const describeASGInstancesMaxIDs = 50
 
-// awsProviderIDRegexp matches the AWS provider ID for EC2-backed nodes.
-// Format: aws:///<az>/i-<hex>. Fargate nodes use a different shape and
-// must therefore be classified by label before reaching this regex.
-var awsProviderIDRegexp = regexp.MustCompile(`^aws:///[^/]+/(i-[0-9a-f]+)$`)
-
 // nodePoolDatadogCreatedLabel is the label set by every Datadog autoscaling
 // product (kubectl-datadog AND the cluster agent) on the NodePools they
 // manage. Broader than the AND-pair `app.kubernetes.io/managed-by:
@@ -173,10 +167,9 @@ func classifyByLabels(ctx context.Context, k8sClient kubernetes.Interface, farga
 			return nil
 		}
 
-		matches := awsProviderIDRegexp.FindStringSubmatch(node.Spec.ProviderID)
-		if len(matches) == 2 {
+		if id, ok := commonaws.ExtractEC2InstanceID(node); ok {
 			candidates = append(candidates, asgCandidate{
-				instanceID: matches[1],
+				instanceID: id,
 				nodeName:   node.Name,
 			})
 		} else {
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/common/karpenter/fromnodes.go b/cmd/kubectl-datadog/autoscaling/cluster/common/karpenter/fromnodes.go
index 504f37850b..d3783fd70e 100644
--- a/cmd/kubectl-datadog/autoscaling/cluster/common/karpenter/fromnodes.go
+++ b/cmd/kubectl-datadog/autoscaling/cluster/common/karpenter/fromnodes.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"log"
 	"maps"
-	"regexp"
 	"slices"
 	"strings"
 
@@ -17,11 +16,9 @@ import (
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/tools/pager"
-)
 
-// awsProviderIDRegexp matches the AWS provider ID format for EC2 instances.
-// Format: aws:///ZONE/INSTANCE_ID (e.g., aws:///us-east-1a/i-0abc123def456789)
-var awsProviderIDRegexp = regexp.MustCompile(`^aws:///[^/]+/(i-[0-9a-f]+)$`)
+	commonaws "github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/aws"
+)
 
 // ec2DescribeBatchSize bounds the number of instance IDs we hand to a single
 // ec2:DescribeInstances / ec2:DescribeImages call. The K8s pager streams
@@ -62,12 +59,12 @@ func GetNodesProperties(ctx context.Context, clientset *kubernetes.Clientset, ec
 		if _, isKarpenter := node.Labels["karpenter.k8s.aws/ec2nodeclass"]; isKarpenter {
 			return nil
 		}
-		matches := awsProviderIDRegexp.FindStringSubmatch(node.Spec.ProviderID)
-		if len(matches) != 2 {
+		id, ok := commonaws.ExtractEC2InstanceID(node)
+		if !ok {
 			log.Printf("Skipping node %s with unexpected provider ID: %s", node.Name, node.Spec.ProviderID)
 			return nil
 		}
-		pending[matches[1]] = pendingNode{labels: node.Labels, taints: node.Spec.Taints}
+		pending[id] = pendingNode{labels: node.Labels, taints: node.Spec.Taints}
 		if len(pending) >= ec2DescribeBatchSize {
 			return flush()
 		}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/asg.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/asg.go
new file mode 100644
index 0000000000..cdef49388e
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/asg.go
@@ -0,0 +1,169 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+
+	awssdk "github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/service/autoscaling"
+	"k8s.io/client-go/kubernetes"
+
+	commonaws "github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/aws"
+)
+
+// AutoscalingAPI is the subset of *autoscaling.Client used by evictASG.
+// Defined as an interface so unit tests can stub the AWS SDK out cheaply.
+type AutoscalingAPI interface {
+	UpdateAutoScalingGroup(ctx context.Context, in *autoscaling.UpdateAutoScalingGroupInput, opts ...func(*autoscaling.Options)) (*autoscaling.UpdateAutoScalingGroupOutput, error)
+	SuspendProcesses(ctx context.Context, in *autoscaling.SuspendProcessesInput, opts ...func(*autoscaling.Options)) (*autoscaling.SuspendProcessesOutput, error)
+	TerminateInstanceInAutoScalingGroup(ctx context.Context, in *autoscaling.TerminateInstanceInAutoScalingGroupInput, opts ...func(*autoscaling.Options)) (*autoscaling.TerminateInstanceInAutoScalingGroupOutput, error)
+}
+
+// evictASG cordons every node in the ASG up front, then drains them,
+// terminating each node's instance — and decrementing the ASG's desired
+// capacity — as soon as that node has drained cleanly, so its capacity is
+// freed without waiting for the rest of the group. Cordoning the whole group
+// before any drain (as EKS does for a managed node group) keeps a pod evicted
+// from one node from landing on a sibling node that is itself about to be
+// drained. Once every node has drained, the ASG is locked at min=max=desired=0
+// so nothing is ever relaunched.
+//
+// Safety rules:
+//
+//  1. An instance is only terminated once its node has drained cleanly. If a
+//     drain fails, that instance is left running (its pods are still on it)
+//     and the ASG is left at MinSize=0 with AZRebalance suspended but is NOT
+//     locked at MaxSize=0, so a re-run can pick up where this one stopped.
+//  2. Per-instance termination requires two precautions, applied up front by
+//     prepareASGForTermination: (a) MinSize=0, because AWS rejects
+//     TerminateInstanceInAutoScalingGroup with ShouldDecrementDesiredCapacity
+//     while MinSize == DesiredCapacity; and (b) suspending the AZRebalance
+//     process, because decrementing desired capacity one instance at a time
+//     can otherwise trigger AZ rebalancing that terminates a not-yet-drained
+//     instance in another AZ.
+//  3. Crash window: because instances are terminated before the final
+//     MaxSize=0 lock, a crash (or lost AWS connectivity) after the last
+//     termination but before the lock leaves the ASG at DesiredCapacity=0
+//     (so it will not relaunch on its own) but with its original MaxSize.
+//     A re-run cannot rediscover it — target discovery is driven by surviving
+//     Kubernetes nodes, which are now gone — so the operator must re-lock
+//     MaxSize manually if an external scaler might raise the desired capacity.
+//     In practice the command also scales cluster-autoscaler to zero, so
+//     nothing routinely raises it.
+//
+// We never delete the ASG: it may be managed by Terraform/CloudFormation/Helm,
+// and only the original owner should remove it.
+func evictASG(ctx context.Context, clientset kubernetes.Interface, asg AutoscalingAPI, asgName string, nodes []string, drainOpts nodeDrainOptions) error {
+	if drainOpts.DryRun {
+		log.Printf("[dry-run] would suspend AZRebalance and set MinSize=0 on ASG %s", asgName)
+	} else if err := prepareASGForTermination(ctx, asg, asgName); err != nil {
+		return fmt.Errorf("prepare ASG %s for termination: %w", asgName, err)
+	}
+
+	// Cordon every node up front so a pod evicted from one node is never
+	// rescheduled onto another node of the same ASG that is itself about to be
+	// drained. A node that fails to cordon is left undrained; treat that as a
+	// drain failure so the ASG keeps its original MaxSize for a re-run instead
+	// of being locked away with workloads still on it.
+	cordoned, errs := cordonNodes(ctx, clientset, nodes, drainOpts.DryRun)
+
+	for _, node := range cordoned {
+		nodeName := node.Name
+		id, hasInstanceID := commonaws.ExtractEC2InstanceID(node)
+		if !hasInstanceID {
+			log.Printf("Warning: node %s has unexpected providerID %q; its instance will be terminated by the final scale-to-zero instead", nodeName, node.Spec.ProviderID)
+		}
+		if err := drainNode(ctx, clientset, nodeName, drainOpts); err != nil {
+			errs = append(errs, fmt.Errorf("drain node %s: %w", nodeName, err))
+			continue // do NOT terminate this instance: workloads are still on it
+		}
+		// The node drained cleanly: terminate its instance now, decrementing
+		// the ASG's desired capacity so it is not relaunched. Nodes with an
+		// unexpected providerID are left for the final scale-to-zero.
+		if !hasInstanceID {
+			continue
+		}
+		if drainOpts.DryRun {
+			log.Printf("[dry-run] would terminate instance %s in ASG %s (decrementing desired capacity)", id, asgName)
+			continue
+		}
+		if err := terminateASGInstance(ctx, asg, id); err != nil {
+			errs = append(errs, fmt.Errorf("terminate instance %s in ASG %s: %w", id, asgName, err))
+		}
+	}
+
+	if len(errs) > 0 {
+		log.Printf("ASG %s: at least one node failed to cordon, drain, or terminate; leaving the ASG at MinSize=0 without locking MaxSize. Re-run after addressing the errors above.", asgName)
+		return errors.Join(errs...)
+	}
+
+	// Every node drained and its instance was terminated. Lock the ASG at
+	// min=max=desired=0 so nothing is ever relaunched, and to clean up any
+	// instance that couldn't be terminated per-node (unexpected providerID).
+	if drainOpts.DryRun {
+		log.Printf("[dry-run] would scale ASG %s to min=max=desired=0", asgName)
+		return nil
+	}
+	// All instances are now terminated; only the MaxSize=0 lock remains. A
+	// crash here would leave the ASG at desired=0 but unlocked and no longer
+	// rediscoverable by a re-run (see the crash-window note on evictASG).
+	log.Printf("ASG %s: all nodes drained; locking the ASG at min=max=desired=0.", asgName)
+	if err := scaleASGToZero(ctx, asg, asgName); err != nil {
+		errs = append(errs, fmt.Errorf("scale ASG %s to 0: %w", asgName, err))
+	}
+	return errors.Join(errs...)
+}
+
+// prepareASGForTermination makes the ASG safe for the per-instance termination
+// performed during the drain loop:
+//
+//   - AZRebalance is suspended so that decrementing desired capacity one
+//     instance at a time cannot trigger AZ rebalancing — which would terminate
+//     a not-yet-drained instance in another availability zone.
+//   - MinSize is set to 0 so that TerminateInstanceInAutoScalingGroup may
+//     decrement DesiredCapacity (AWS rejects the decrement while
+//     MinSize == DesiredCapacity).
+func prepareASGForTermination(ctx context.Context, asg AutoscalingAPI, asgName string) error {
+	if _, err := asg.SuspendProcesses(ctx, &autoscaling.SuspendProcessesInput{
+		AutoScalingGroupName: awssdk.String(asgName),
+		ScalingProcesses:     []string{"AZRebalance"},
+	}); err != nil {
+		return fmt.Errorf("suspend AZRebalance: %w", err)
+	}
+	if _, err := asg.UpdateAutoScalingGroup(ctx, &autoscaling.UpdateAutoScalingGroupInput{
+		AutoScalingGroupName: awssdk.String(asgName),
+		MinSize:              awssdk.Int32(0),
+	}); err != nil {
+		return fmt.Errorf("set MinSize=0: %w", err)
+	}
+	log.Printf("Prepared ASG %s for termination (AZRebalance suspended, MinSize=0).", asgName)
+	return nil
+}
+
+// terminateASGInstance terminates a single (already drained) instance and
+// decrements the ASG's desired capacity so it is not relaunched.
+func terminateASGInstance(ctx context.Context, asg AutoscalingAPI, instanceID string) error {
+	if _, err := asg.TerminateInstanceInAutoScalingGroup(ctx, &autoscaling.TerminateInstanceInAutoScalingGroupInput{
+		InstanceId:                     awssdk.String(instanceID),
+		ShouldDecrementDesiredCapacity: awssdk.Bool(true),
+	}); err != nil {
+		return err
+	}
+	log.Printf("Terminated instance %s and decremented ASG desired capacity.", instanceID)
+	return nil
+}
+
+func scaleASGToZero(ctx context.Context, asg AutoscalingAPI, asgName string) error {
+	if _, err := asg.UpdateAutoScalingGroup(ctx, &autoscaling.UpdateAutoScalingGroupInput{
+		AutoScalingGroupName: awssdk.String(asgName),
+		MinSize:              awssdk.Int32(0),
+		MaxSize:              awssdk.Int32(0),
+		DesiredCapacity:      awssdk.Int32(0),
+	}); err != nil {
+		return err
+	}
+	log.Printf("Scaled ASG %s to min=max=desired=0.", asgName)
+	return nil
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/asg_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/asg_test.go
new file mode 100644
index 0000000000..e061f1b1db
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/asg_test.go
@@ -0,0 +1,353 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"slices"
+	"testing"
+	"time"
+
+	awssdk "github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/service/autoscaling"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+	clienttesting "k8s.io/client-go/testing"
+)
+
+// stubAutoscaling implements AutoscalingAPI by recording each call. Returning
+// a non-nil output keeps the production code happy without exercising any AWS
+// SDK middleware.
+type stubAutoscaling struct {
+	updates              []autoscaling.UpdateAutoScalingGroupInput // captured UpdateAutoScalingGroup inputs, in order
+	suspendedAZRebalance []string                                  // asg names for which AZRebalance was suspended
+	terminated           []string                                  // instance IDs terminated in-ASG
+	suspendErr           error                                     // returned by SuspendProcesses
+	prepUpdateErr        error                                     // returned by the MinSize-only prep update
+	lockErr              error                                     // returned by the final min=max=desired=0 lock update
+	terminateErr         error                                     // returned by TerminateInstanceInAutoScalingGroup
+}
+
+func (s *stubAutoscaling) UpdateAutoScalingGroup(_ context.Context, in *autoscaling.UpdateAutoScalingGroupInput, _ ...func(*autoscaling.Options)) (*autoscaling.UpdateAutoScalingGroupOutput, error) {
+	s.updates = append(s.updates, *in)
+	// The prep update sets MinSize only (MaxSize left nil); the final lock
+	// update sets all three fields. Fail the one the test asked for.
+	if in.MaxSize == nil {
+		return &autoscaling.UpdateAutoScalingGroupOutput{}, s.prepUpdateErr
+	}
+	return &autoscaling.UpdateAutoScalingGroupOutput{}, s.lockErr
+}
+
+func (s *stubAutoscaling) SuspendProcesses(_ context.Context, in *autoscaling.SuspendProcessesInput, _ ...func(*autoscaling.Options)) (*autoscaling.SuspendProcessesOutput, error) {
+	if slices.Contains(in.ScalingProcesses, "AZRebalance") {
+		s.suspendedAZRebalance = append(s.suspendedAZRebalance, awssdk.ToString(in.AutoScalingGroupName))
+	}
+	return &autoscaling.SuspendProcessesOutput{}, s.suspendErr
+}
+
+func (s *stubAutoscaling) TerminateInstanceInAutoScalingGroup(_ context.Context, in *autoscaling.TerminateInstanceInAutoScalingGroupInput, _ ...func(*autoscaling.Options)) (*autoscaling.TerminateInstanceInAutoScalingGroupOutput, error) {
+	s.terminated = append(s.terminated, awssdk.ToString(in.InstanceId))
+	return &autoscaling.TerminateInstanceInAutoScalingGroupOutput{}, s.terminateErr
+}
+
+// minSizeZeroPrepped reports whether a "prep" update (MinSize=0 only, MaxSize
+// left untouched) was issued for asgName.
+func (s *stubAutoscaling) minSizeZeroPrepped(asgName string) bool {
+	return slices.ContainsFunc(s.updates, func(u autoscaling.UpdateAutoScalingGroupInput) bool {
+		return awssdk.ToString(u.AutoScalingGroupName) == asgName &&
+			u.MinSize != nil && *u.MinSize == 0 && u.MaxSize == nil
+	})
+}
+
+// locked reports whether the final min=max=desired=0 update was issued for
+// asgName.
+func (s *stubAutoscaling) locked(asgName string) bool {
+	return slices.ContainsFunc(s.updates, func(u autoscaling.UpdateAutoScalingGroupInput) bool {
+		return awssdk.ToString(u.AutoScalingGroupName) == asgName &&
+			u.MinSize != nil && *u.MinSize == 0 &&
+			u.MaxSize != nil && *u.MaxSize == 0 &&
+			u.DesiredCapacity != nil && *u.DesiredCapacity == 0
+	})
+}
+
+func newDrainOpts(dryRun bool) nodeDrainOptions {
+	return nodeDrainOptions{
+		DryRun:          dryRun,
+		EvictionTimeout: time.Second,
+		NodeTimeout:     time.Second,
+		PollInterval:    10 * time.Millisecond,
+	}
+}
+
+func TestEvictASG(t *testing.T) {
+	ec2Node := func() *corev1.Node {
+		return &corev1.Node{
+			ObjectMeta: metav1.ObjectMeta{Name: "ip-1"},
+			Spec:       corev1.NodeSpec{ProviderID: "aws:///eu-west-3a/i-0123456789abcdef0"},
+		}
+	}
+	fargateNode := func() *corev1.Node {
+		return &corev1.Node{
+			ObjectMeta: metav1.ObjectMeta{Name: "ip-1"},
+			Spec:       corev1.NodeSpec{ProviderID: "aws:///eu-west-3a/fargate-ip-10-0-1-2"},
+		}
+	}
+	stuckNode := func() *corev1.Node {
+		return &corev1.Node{
+			ObjectMeta: metav1.ObjectMeta{Name: "ip-stuck"},
+			Spec:       corev1.NodeSpec{ProviderID: "aws:///eu-west-3a/i-aaaaaaaaaaaaaaaaa"},
+		}
+	}
+	stuckPod := func() *corev1.Pod {
+		return &corev1.Pod{
+			ObjectMeta: metav1.ObjectMeta{Name: "blocker", Namespace: "default"},
+			Spec:       corev1.PodSpec{NodeName: "ip-stuck"},
+		}
+	}
+	fastDrain := nodeDrainOptions{
+		EvictionTimeout: 50 * time.Millisecond,
+		NodeTimeout:     100 * time.Millisecond,
+		PollInterval:    10 * time.Millisecond,
+	}
+
+	for _, tc := range []struct {
+		name string
+		// objects pre-populate the fake clientset.
+		objects []runtime.Object
+		// installEvictionReactor wires a 200-OK Eviction subresource reactor
+		// that keeps the pod alive in the store (so drainNode hits its node
+		// timeout). Used only by the drain-failure case.
+		installEvictionReactor bool
+		// nodes is the slice passed to evictASG.
+		nodes []string
+		opts  nodeDrainOptions
+		// terminateErr / lockErr inject AWS failures into the per-instance
+		// termination and the final lock update respectively.
+		terminateErr error
+		lockErr      error
+		// wantErr=true requires evictASG to return a non-nil error.
+		wantErr bool
+		// wantPrepped: the ASG was prepped for termination (AZRebalance
+		// suspended + MinSize=0).
+		wantPrepped bool
+		// wantTerminated is the expected set of instance IDs terminated via
+		// TerminateInstanceInAutoScalingGroup. nil ⇒ none.
+		wantTerminated []string
+		// wantLocked: the final min=max=desired=0 update was issued.
+		wantLocked bool
+		// wantUnschedulable: per-node expected `Spec.Unschedulable`. Nodes
+		// absent from the map (or absent from the cluster) aren't checked.
+		wantUnschedulable map[string]bool
+	}{
+		{
+			name:              "happy path",
+			objects:           []runtime.Object{ec2Node()},
+			nodes:             []string{"ip-1"},
+			opts:              newDrainOpts(false),
+			wantPrepped:       true,
+			wantTerminated:    []string{"i-0123456789abcdef0"},
+			wantLocked:        true,
+			wantUnschedulable: map[string]bool{"ip-1": true},
+		},
+		{
+			// ASG neutralization still happens even when the requested K8s
+			// node is already gone — the operator may be re-running after a
+			// partial earlier execution and the AWS-side scale-to-zero must
+			// still land. No per-instance termination: the node (hence its
+			// instance ID) is gone.
+			name:        "node already gone",
+			nodes:       []string{"ip-1"},
+			opts:        newDrainOpts(false),
+			wantPrepped: true,
+			wantLocked:  true,
+		},
+		{
+			name:              "dry-run mutates nothing",
+			objects:           []runtime.Object{ec2Node()},
+			nodes:             []string{"ip-1"},
+			opts:              newDrainOpts(true),
+			wantUnschedulable: map[string]bool{"ip-1": false},
+			// wantPrepped/wantLocked false, wantTerminated nil ⇒ no AWS
+			// mutation must happen.
+		},
+		{
+			// A node with a non-EC2 providerID (e.g. Fargate-ish) yields no
+			// instance ID: it still drains, but per-instance termination is
+			// skipped and the instance is cleaned up by the final
+			// scale-to-zero instead.
+			name:              "non-EC2 providerID drains, skips per-instance terminate, still locks",
+			objects:           []runtime.Object{fargateNode()},
+			nodes:             []string{"ip-1"},
+			opts:              newDrainOpts(false),
+			wantPrepped:       true,
+			wantLocked:        true,
+			wantUnschedulable: map[string]bool{"ip-1": true},
+		},
+		{
+			// Safety regression: a node failing to drain (here a PDB-blocked
+			// pod stays in the fake store past the eviction reactor) must
+			// leave the instance running and the ASG unlocked (MaxSize not
+			// forced to 0) so a re-run can pick up where this one stopped.
+			// The up-front prep (MinSize=0, AZRebalance suspended) has
+			// happened and is idempotent on re-run.
+			name:                   "drain failure leaves ASG unlocked",
+			objects:                []runtime.Object{stuckNode(), stuckPod()},
+			installEvictionReactor: true,
+			nodes:                  []string{"ip-stuck"},
+			opts:                   fastDrain,
+			wantErr:                true,
+			wantPrepped:            true,
+			wantLocked:             false,
+		},
+		{
+			// A per-instance termination failure must propagate and, like a
+			// drain failure, leave the ASG unlocked for a re-run.
+			name:           "terminate failure leaves ASG unlocked",
+			objects:        []runtime.Object{ec2Node()},
+			nodes:          []string{"ip-1"},
+			opts:           newDrainOpts(false),
+			terminateErr:   errTerminate,
+			wantErr:        true,
+			wantPrepped:    true,
+			wantTerminated: []string{"i-0123456789abcdef0"},
+			wantLocked:     false,
+		},
+		{
+			// A failure of the final lock update must surface as an error
+			// (the lock call was still attempted).
+			name:           "final lock failure surfaces error",
+			objects:        []runtime.Object{ec2Node()},
+			nodes:          []string{"ip-1"},
+			opts:           newDrainOpts(false),
+			lockErr:        errLock,
+			wantErr:        true,
+			wantPrepped:    true,
+			wantTerminated: []string{"i-0123456789abcdef0"},
+			wantLocked:     true,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			client := fake.NewClientset(tc.objects...)
+			if tc.installEvictionReactor {
+				installPodEvictionReactor(client)
+			}
+			stub := &stubAutoscaling{terminateErr: tc.terminateErr, lockErr: tc.lockErr}
+
+			err := evictASG(t.Context(), client, stub, "my-asg", tc.nodes, tc.opts)
+			if tc.wantErr {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+
+			if tc.wantPrepped {
+				assert.Equal(t, []string{"my-asg"}, stub.suspendedAZRebalance, "AZRebalance must be suspended once")
+				assert.True(t, stub.minSizeZeroPrepped("my-asg"), "MinSize=0 prep update must be issued")
+			} else {
+				assert.Empty(t, stub.suspendedAZRebalance, "AZRebalance must not be suspended")
+				assert.Empty(t, stub.updates, "no UpdateAutoScalingGroup call must happen")
+			}
+
+			if tc.wantTerminated == nil {
+				assert.Empty(t, stub.terminated, "no instance must be terminated")
+			} else {
+				assert.ElementsMatch(t, tc.wantTerminated, stub.terminated)
+			}
+
+			assert.Equal(t, tc.wantLocked, stub.locked("my-asg"), "final scale-to-zero")
+
+			for nodeName, want := range tc.wantUnschedulable {
+				got, getErr := client.CoreV1().Nodes().Get(t.Context(), nodeName, metav1.GetOptions{})
+				require.NoError(t, getErr)
+				assert.Equal(t, want, got.Spec.Unschedulable, "Spec.Unschedulable for %s", nodeName)
+			}
+		})
+	}
+}
+
+var (
+	errSuspend   = errors.New("suspend boom")
+	errPrepMin   = errors.New("min-size boom")
+	errTerminate = errors.New("terminate boom")
+	errLock      = errors.New("lock boom")
+)
+
+// TestEvictASGCordonFailure verifies that when a node cannot be cordoned, it is
+// left undrained and the ASG is NOT locked at MaxSize=0 — a re-run must be able
+// to finish the job. The up-front prep (AZRebalance suspend + MinSize=0) still
+// ran and is idempotent on the re-run.
+func TestEvictASGCordonFailure(t *testing.T) {
+	node := &corev1.Node{
+		ObjectMeta: metav1.ObjectMeta{Name: "ip-1"},
+		Spec:       corev1.NodeSpec{ProviderID: "aws:///eu-west-3a/i-0123456789abcdef0"},
+	}
+	client := fake.NewClientset(node)
+	// Every node Update fails with a non-conflict error so the cordon gives up.
+	client.PrependReactor("update", "nodes", func(clienttesting.Action) (bool, runtime.Object, error) {
+		return true, nil, apierrors.NewInternalError(errors.New("cordon boom"))
+	})
+	stub := &stubAutoscaling{}
+
+	err := evictASG(t.Context(), client, stub, "my-asg", []string{"ip-1"}, newDrainOpts(false))
+	require.Error(t, err)
+
+	// Prep precedes the cordon, so it ran...
+	assert.Equal(t, []string{"my-asg"}, stub.suspendedAZRebalance, "AZRebalance suspended up front")
+	assert.True(t, stub.minSizeZeroPrepped("my-asg"), "MinSize=0 prep update issued up front")
+	// ...but the un-cordoned node was never drained or terminated, and the ASG
+	// stays unlocked so a re-run can finish.
+	assert.Empty(t, stub.terminated, "no instance terminated when the node failed to cordon")
+	assert.False(t, stub.locked("my-asg"), "ASG must not be locked when a node failed to cordon")
+}
+
+// TestEvictASGPrepFailure covers the up-front prepareASGForTermination failure
+// branches: when either the AZRebalance suspend or the MinSize=0 update fails,
+// evictASG must abort before touching any node (no cordon, no drain, no
+// termination, no lock) so a re-run can start cleanly.
+func TestEvictASGPrepFailure(t *testing.T) {
+	ec2Node := &corev1.Node{
+		ObjectMeta: metav1.ObjectMeta{Name: "ip-1"},
+		Spec:       corev1.NodeSpec{ProviderID: "aws:///eu-west-3a/i-0123456789abcdef0"},
+	}
+
+	for _, tc := range []struct {
+		name          string
+		suspendErr    error
+		prepUpdateErr error
+		// wantMinSizeUpdate: the MinSize=0 prep update was attempted (only
+		// reached when the suspend succeeded).
+		wantMinSizeUpdate bool
+	}{
+		{
+			name:       "suspend failure aborts before draining",
+			suspendErr: errSuspend,
+		},
+		{
+			name:              "min-size update failure aborts before draining",
+			prepUpdateErr:     errPrepMin,
+			wantMinSizeUpdate: true,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			client := fake.NewClientset(ec2Node)
+			stub := &stubAutoscaling{suspendErr: tc.suspendErr, prepUpdateErr: tc.prepUpdateErr}
+
+			err := evictASG(t.Context(), client, stub, "my-asg", []string{"ip-1"}, newDrainOpts(false))
+			require.Error(t, err)
+
+			// The drain loop must never have run: the node stays schedulable
+			// and no instance is terminated or locked.
+			assert.Empty(t, stub.terminated, "no instance must be terminated")
+			assert.False(t, stub.locked("my-asg"), "ASG must not be locked")
+			assert.Equal(t, tc.wantMinSizeUpdate, stub.minSizeZeroPrepped("my-asg"), "MinSize=0 prep update attempted")
+
+			got, getErr := client.CoreV1().Nodes().Get(t.Context(), "ip-1", metav1.GetOptions{})
+			require.NoError(t, getErr)
+			assert.False(t, got.Spec.Unschedulable, "node must not be cordoned when prep fails")
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/clusterautoscaler.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/clusterautoscaler.go
new file mode 100644
index 0000000000..0188f70510
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/clusterautoscaler.go
@@ -0,0 +1,50 @@
+package evict
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/util/retry"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+// scaleDownClusterAutoscaler patches the cluster-autoscaler Deployment's
+// /scale subresource to 0 replicas. Using the subresource keeps the change
+// minimal — we never touch the rest of the Deployment spec, so the diff is
+// invisible to a Helm/Argo controller that watches the image, env, etc.
+//
+// Wrapped in RetryOnConflict to survive races against any controller that
+// updates the Deployment between our Get and our Update.
+//
+// Idempotent: a Deployment already at 0 replicas returns nil without an
+// Update call. Best-effort against GitOps controllers — if Helm/Argo reverts
+// the change, this command does not loop; the operator is expected to pause
+// the GitOps reconciliation for cluster-autoscaler before running.
+func scaleDownClusterAutoscaler(ctx context.Context, clientset kubernetes.Interface, ca clusterinfo.ClusterAutoscaler, dryRun bool) error {
+	if !ca.Present {
+		return nil
+	}
+	if dryRun {
+		log.Printf("[dry-run] would scale Deployment %s/%s to 0 replicas", ca.Namespace, ca.Name)
+		return nil
+	}
+	return retry.RetryOnConflict(retry.DefaultRetry, func() error {
+		scale, err := clientset.AppsV1().Deployments(ca.Namespace).GetScale(ctx, ca.Name, metav1.GetOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to get Deployment scale %s/%s: %w", ca.Namespace, ca.Name, err)
+		}
+		if scale.Spec.Replicas == 0 {
+			return nil
+		}
+		scale.Spec.Replicas = 0
+		if _, err = clientset.AppsV1().Deployments(ca.Namespace).UpdateScale(ctx, ca.Name, scale, metav1.UpdateOptions{}); err != nil {
+			return err
+		}
+		log.Printf("Scaled Deployment %s/%s to 0 replicas.", ca.Namespace, ca.Name)
+		return nil
+	})
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/clusterautoscaler_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/clusterautoscaler_test.go
new file mode 100644
index 0000000000..6e82f4f0cf
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/clusterautoscaler_test.go
@@ -0,0 +1,165 @@
+package evict
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	appsv1 "k8s.io/api/apps/v1"
+	autoscalingv1 "k8s.io/api/autoscaling/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/kubernetes/fake"
+	clienttesting "k8s.io/client-go/testing"
+	"k8s.io/utils/ptr"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+// deploymentsGVR is the GroupVersionResource used to talk to the fake
+// clientset's object tracker for Deployment objects.
+var deploymentsGVR = schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"}
+
+// scaleReactor wires GetScale/UpdateScale on a fake clientset so the test
+// observes the same read-modify-write surface as production. The fake doesn't
+// natively support the Scale subresource, so we synthesize Scale objects from
+// the underlying Deployment. The reactor talks to the underlying tracker
+// directly (rather than via client.AppsV1()) because the fake holds an
+// internal mutex while a reactor runs — calling back through the typed client
+// would deadlock.
+func scaleReactor(t *testing.T, client *fake.Clientset, conflictFirstUpdate bool) *int {
+	t.Helper()
+	updateCalls := 0
+	tracker := client.Tracker()
+	client.PrependReactor("get", "deployments", func(action clienttesting.Action) (bool, runtime.Object, error) {
+		ga, ok := action.(clienttesting.GetAction)
+		if !ok || ga.GetSubresource() != "scale" {
+			return false, nil, nil
+		}
+		obj, err := tracker.Get(deploymentsGVR, ga.GetNamespace(), ga.GetName())
+		if err != nil {
+			return true, nil, err
+		}
+		dep := obj.(*appsv1.Deployment)
+		var replicas int32
+		if dep.Spec.Replicas != nil {
+			replicas = *dep.Spec.Replicas
+		}
+		return true, &autoscalingv1.Scale{
+			ObjectMeta: metav1.ObjectMeta{Name: dep.Name, Namespace: dep.Namespace, ResourceVersion: dep.ResourceVersion},
+			Spec:       autoscalingv1.ScaleSpec{Replicas: replicas},
+		}, nil
+	})
+	client.PrependReactor("update", "deployments", func(action clienttesting.Action) (bool, runtime.Object, error) {
+		ua, ok := action.(clienttesting.UpdateAction)
+		if !ok || ua.GetSubresource() != "scale" {
+			return false, nil, nil
+		}
+		scale := ua.GetObject().(*autoscalingv1.Scale)
+		updateCalls++
+		if conflictFirstUpdate && updateCalls == 1 {
+			return true, nil, apierrors.NewConflict(
+				schema.GroupResource{Group: "apps", Resource: "deployments"},
+				scale.Name,
+				errors.New("forced conflict"),
+			)
+		}
+		obj, err := tracker.Get(deploymentsGVR, ua.GetNamespace(), scale.Name)
+		if err != nil {
+			return true, nil, err
+		}
+		dep := obj.(*appsv1.Deployment).DeepCopy()
+		dep.Spec.Replicas = ptr.To(scale.Spec.Replicas)
+		if err := tracker.Update(deploymentsGVR, dep, ua.GetNamespace()); err != nil {
+			return true, nil, err
+		}
+		return true, scale, nil
+	})
+	return &updateCalls
+}
+
+func TestScaleDownClusterAutoscaler(t *testing.T) {
+	caDep := func(replicas int32) *appsv1.Deployment {
+		return &appsv1.Deployment{
+			ObjectMeta: metav1.ObjectMeta{Name: "cluster-autoscaler", Namespace: "kube-system"},
+			Spec:       appsv1.DeploymentSpec{Replicas: ptr.To(replicas)},
+		}
+	}
+	caPresent := clusterinfo.ClusterAutoscaler{Present: true, Namespace: "kube-system", Name: "cluster-autoscaler"}
+
+	for _, tc := range []struct {
+		name string
+		// dep, when non-nil, is pre-loaded into the fake clientset.
+		dep *appsv1.Deployment
+		// ca is what Run would have passed in from clusterinfo.Classify.
+		ca clusterinfo.ClusterAutoscaler
+		// conflictFirstUpdate forces the scaleReactor to return Conflict on
+		// the first UpdateScale call so RetryOnConflict has to refetch.
+		conflictFirstUpdate bool
+		dryRun              bool
+		// wantUpdateCalls is the minimum number of UpdateScale invocations
+		// the test expects (>= because retries may happen).
+		wantUpdateCalls int
+		// wantReplicas, when non-nil, is the final value of the Deployment's
+		// Spec.Replicas after the call.
+		wantReplicas *int32
+	}{
+		{
+			name: "CA absent is a no-op",
+			ca:   clusterinfo.ClusterAutoscaler{Present: false},
+		},
+		{
+			name:            "replicas already 0 skips Update",
+			dep:             caDep(0),
+			ca:              caPresent,
+			wantUpdateCalls: 0,
+			wantReplicas:    ptr.To(int32(0)),
+		},
+		{
+			name:            "scales from 3 to 0",
+			dep:             caDep(3),
+			ca:              caPresent,
+			wantUpdateCalls: 1,
+			wantReplicas:    ptr.To(int32(0)),
+		},
+		{
+			name:                "retries on Conflict",
+			dep:                 caDep(2),
+			ca:                  caPresent,
+			conflictFirstUpdate: true,
+			wantUpdateCalls:     2,
+			wantReplicas:        ptr.To(int32(0)),
+		},
+		{
+			name:            "dry-run touches nothing",
+			dep:             caDep(3),
+			ca:              caPresent,
+			dryRun:          true,
+			wantUpdateCalls: 0,
+			wantReplicas:    ptr.To(int32(3)),
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			var objs []runtime.Object
+			if tc.dep != nil {
+				objs = append(objs, tc.dep)
+			}
+			client := fake.NewClientset(objs...)
+			calls := scaleReactor(t, client, tc.conflictFirstUpdate)
+
+			require.NoError(t, scaleDownClusterAutoscaler(t.Context(), client, tc.ca, tc.dryRun))
+
+			assert.GreaterOrEqual(t, *calls, tc.wantUpdateCalls, "minimum UpdateScale calls")
+			if tc.wantReplicas == nil {
+				return
+			}
+			got, err := client.AppsV1().Deployments(tc.ca.Namespace).Get(t.Context(), tc.ca.Name, metav1.GetOptions{})
+			require.NoError(t, err)
+			require.NotNil(t, got.Spec.Replicas)
+			assert.Equal(t, *tc.wantReplicas, *got.Spec.Replicas)
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/cordon.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/cordon.go
new file mode 100644
index 0000000000..97973875b6
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/cordon.go
@@ -0,0 +1,74 @@
+package evict
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/util/retry"
+)
+
+// cordonNodes marks every node in the group Unschedulable up front, before any
+// of them is drained — mirroring how EKS marks a managed node group
+// unschedulable before draining it. A pod evicted from one node is then never
+// rescheduled onto another node of the same group that is itself about to be
+// drained.
+//
+// It returns the Node objects that are now cordoned and safe to drain, so the
+// caller can read their immutable fields (e.g. providerID) without a second
+// Get. A node that is already gone is skipped silently (absent from both
+// return values); a node that fails to cordon is recorded in errs and left out
+// of cordoned so the caller never drains a node that can still receive pods.
+func cordonNodes(ctx context.Context, clientset kubernetes.Interface, nodes []string, dryRun bool) (cordoned []*corev1.Node, errs []error) {
+	for _, nodeName := range nodes {
+		node, err := cordonNode(ctx, clientset, nodeName, dryRun)
+		if err != nil {
+			errs = append(errs, fmt.Errorf("cordon node %s: %w", nodeName, err))
+			continue
+		}
+		if node != nil {
+			cordoned = append(cordoned, node)
+		}
+	}
+	return cordoned, errs
+}
+
+// cordonNode marks the node Unschedulable and returns the resulting Node so the
+// caller can read immutable fields (e.g. providerID) without a second Get. It
+// returns (nil, nil) when the node is already gone: there is nothing to
+// schedule onto a deleted node, and a re-run rediscovers any surviving nodes.
+// The Get + mutate + Update is wrapped in RetryOnConflict to survive races
+// against kubelet, the scheduler, and any other controller that mutates Node
+// objects.
+func cordonNode(ctx context.Context, clientset kubernetes.Interface, name string, dryRun bool) (*corev1.Node, error) {
+	var cordoned *corev1.Node
+	err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+		node, err := clientset.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
+		if err != nil {
+			if apierrors.IsNotFound(err) {
+				return nil // node already gone: nothing to schedule on it
+			}
+			return fmt.Errorf("failed to get node %s: %w", name, err)
+		}
+		if dryRun {
+			log.Printf("[dry-run] would cordon node %s", name)
+		} else if !node.Spec.Unschedulable {
+			node.Spec.Unschedulable = true
+			node, err = clientset.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{})
+			if err != nil {
+				if apierrors.IsNotFound(err) {
+					return nil // deleted concurrently: nothing to schedule on it
+				}
+				return fmt.Errorf("failed to update node %s: %w", name, err)
+			}
+			log.Printf("Cordoned node %s.", name)
+		}
+		cordoned = node
+		return nil
+	})
+	return cordoned, err
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/cordon_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/cordon_test.go
new file mode 100644
index 0000000000..d325d9a311
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/cordon_test.go
@@ -0,0 +1,134 @@
+package evict
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/kubernetes/fake"
+	clienttesting "k8s.io/client-go/testing"
+)
+
+func TestCordonNode(t *testing.T) {
+	for _, tc := range []struct {
+		name string
+		// initialUnschedulable is the node's starting `Spec.Unschedulable`.
+		initialUnschedulable bool
+		// conflictFirstUpdate forces a Conflict on the first Update so
+		// RetryOnConflict has to refetch and re-apply.
+		conflictFirstUpdate bool
+		// updateNotFound makes the Update return NotFound, simulating the node
+		// being deleted between the Get and the Update.
+		updateNotFound bool
+		dryRun         bool
+		// wantUpdateCalls is the minimum number of Update invocations
+		// expected on the Nodes endpoint.
+		wantMinUpdateCalls int
+		wantUnschedulable  bool
+		// wantNilNode expects cordonNode to return a nil Node (the node is gone).
+		wantNilNode bool
+	}{
+		{
+			name:               "schedulable node becomes cordoned",
+			wantMinUpdateCalls: 1,
+			wantUnschedulable:  true,
+		},
+		{
+			// Already cordoned ⇒ no Update issued (idempotent).
+			name:                 "already cordoned is a no-op",
+			initialUnschedulable: true,
+			wantMinUpdateCalls:   0,
+			wantUnschedulable:    true,
+		},
+		{
+			name:                "retries on Conflict",
+			conflictFirstUpdate: true,
+			wantMinUpdateCalls:  2,
+			wantUnschedulable:   true,
+		},
+		{
+			name:               "dry-run touches nothing",
+			dryRun:             true,
+			wantMinUpdateCalls: 0,
+			wantUnschedulable:  false,
+		},
+		{
+			// Node deleted between the Get and the Update ⇒ NotFound on Update
+			// is a silent success (nothing to schedule onto a deleted node).
+			name:               "deleted between get and update is a no-op",
+			updateNotFound:     true,
+			wantMinUpdateCalls: 1,
+			wantUnschedulable:  false,
+			wantNilNode:        true,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			node := &corev1.Node{
+				ObjectMeta: metav1.ObjectMeta{Name: "n1"},
+				Spec:       corev1.NodeSpec{Unschedulable: tc.initialUnschedulable},
+			}
+			client := fake.NewClientset(node)
+
+			var updateCalls int
+			client.PrependReactor("update", "nodes", func(_ clienttesting.Action) (bool, runtime.Object, error) {
+				updateCalls++
+				if tc.conflictFirstUpdate && updateCalls == 1 {
+					return true, nil, apierrors.NewConflict(
+						schema.GroupResource{Resource: "nodes"}, "n1", errors.New("forced conflict"),
+					)
+				}
+				if tc.updateNotFound {
+					return true, nil, apierrors.NewNotFound(schema.GroupResource{Resource: "nodes"}, "n1")
+				}
+				return false, nil, nil
+			})
+
+			gotNode, cordonErr := cordonNode(t.Context(), client, "n1", tc.dryRun)
+			require.NoError(t, cordonErr)
+			assert.Equal(t, tc.wantNilNode, gotNode == nil, "cordonNode nil-node contract")
+
+			assert.GreaterOrEqual(t, updateCalls, tc.wantMinUpdateCalls, "minimum Update calls")
+			got, err := client.CoreV1().Nodes().Get(t.Context(), "n1", metav1.GetOptions{})
+			require.NoError(t, err)
+			assert.Equal(t, tc.wantUnschedulable, got.Spec.Unschedulable)
+		})
+	}
+}
+
+// TestCordonNodes covers the up-front cordon pass: a live node is cordoned and
+// returned (carrying its state so the caller needs no second Get), an
+// already-gone node is a silent skip (no error, absent from the result, so a
+// re-run after a partial execution is not derailed), and a node that fails to
+// cordon is recorded as an error and excluded so the caller never drains a node
+// that can still receive pods.
+func TestCordonNodes(t *testing.T) {
+	client := fake.NewClientset(
+		&corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "ip-live"}},
+		&corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "ip-bad"}},
+	)
+	// Updates to ip-bad fail with a non-conflict error so cordonNode gives up
+	// immediately (RetryOnConflict only retries Conflict).
+	client.PrependReactor("update", "nodes", func(action clienttesting.Action) (bool, runtime.Object, error) {
+		ua, ok := action.(clienttesting.UpdateAction)
+		if !ok || ua.GetObject().(*corev1.Node).Name != "ip-bad" {
+			return false, nil, nil
+		}
+		return true, nil, apierrors.NewInternalError(errors.New("update boom"))
+	})
+
+	// ip-gone is absent from the cluster: cordonNode must treat NotFound as a
+	// silent skip.
+	cordoned, errs := cordonNodes(t.Context(), client, []string{"ip-live", "ip-gone", "ip-bad"}, false)
+
+	require.Len(t, cordoned, 1, "only the live, successfully-cordoned node is returned")
+	assert.Equal(t, "ip-live", cordoned[0].Name)
+	assert.True(t, cordoned[0].Spec.Unschedulable, "the returned node carries the cordoned state")
+	require.Len(t, errs, 1)
+	assert.ErrorContains(t, errs[0], "ip-bad")
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/eks_mng.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/eks_mng.go
new file mode 100644
index 0000000000..bfd31aa41b
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/eks_mng.go
@@ -0,0 +1,123 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"time"
+
+	awssdk "github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/service/eks"
+	ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+
+	commonaws "github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/aws"
+)
+
+// errEKSDrainIncomplete is returned (wrapped) by waitEKSNodegroupEmpty when
+// the deadline expires while nodes still carry the node group label. The
+// orchestrator uses errors.Is to distinguish this case ("EKS drain is in
+// progress but slow — keep temp PDBs in place") from a failed
+// UpdateNodegroupConfig call ("EKS has not started draining — cleanup is
+// safe").
+var errEKSDrainIncomplete = errors.New("EKS managed node group drain did not complete within the timeout")
+
+// EKSManagedNodeGroupAPI is the subset of *eks.Client used by
+// evictEKSManagedNodeGroup. Defined as an interface so unit tests can stub the
+// AWS SDK out cheaply.
+type EKSManagedNodeGroupAPI interface {
+	DescribeNodegroup(ctx context.Context, in *eks.DescribeNodegroupInput, opts ...func(*eks.Options)) (*eks.DescribeNodegroupOutput, error)
+	UpdateNodegroupConfig(ctx context.Context, in *eks.UpdateNodegroupConfigInput, opts ...func(*eks.Options)) (*eks.UpdateNodegroupConfigOutput, error)
+}
+
+// evictEKSManagedNodeGroup delegates the drain to EKS by setting the node
+// group's scaling config to min=desired=0 (max is preserved from the existing
+// scaling config because the EKS API rejects `maxSize < 1`). The EKS control
+// plane then cordons and evicts pods from each node (respecting
+// PodDisruptionBudgets) before terminating the underlying EC2 instances. We
+// do not cordon or evict from kubectl-datadog because doing so concurrently
+// with the EKS-managed drain can produce confusing logs and double the load
+// on the apiserver.
+//
+// UpdateNodegroupConfig returns immediately, but we then **wait** for the
+// drain to actually complete by polling the Kubernetes API for nodes carrying
+// the `eks.amazonaws.com/nodegroup=<name>` label. Without that wait, the
+// caller (Run) would proceed to cleanup any temporary PodDisruptionBudgets
+// before EKS even started evicting, leaving cross-type workloads (a
+// Deployment with replicas on EKS MNG and on an ASG/Karpenter NodePool, for
+// instance) unprotected mid-eviction.
+//
+// We never delete the node group: it may be Terraform-/CloudFormation-managed,
+// and only the original owner should remove it.
+func evictEKSManagedNodeGroup(ctx context.Context, eksAPI EKSManagedNodeGroupAPI, clientset kubernetes.Interface, clusterName, nodegroupName string, drainOpts nodeDrainOptions) error {
+	if drainOpts.DryRun {
+		log.Printf("[dry-run] would scale EKS node group %s/%s to min=desired=0 (max preserved) and wait for EKS to drain", clusterName, nodegroupName)
+		return nil
+	}
+	// EKS rejects maxSize < 1 (see NodegroupScalingConfig API reference), so
+	// preserve the existing maxSize when scaling to zero. min/desired both 0
+	// is enough to drain the group; keeping the original max means a later
+	// caller (e.g. Terraform) can scale back up to the previously configured
+	// ceiling without re-reading the desired max.
+	desc, err := eksAPI.DescribeNodegroup(ctx, &eks.DescribeNodegroupInput{
+		ClusterName:   awssdk.String(clusterName),
+		NodegroupName: awssdk.String(nodegroupName),
+	})
+	if err != nil {
+		return fmt.Errorf("DescribeNodegroup: %w", err)
+	}
+	maxSize := int32(1)
+	if desc.Nodegroup != nil && desc.Nodegroup.ScalingConfig != nil && desc.Nodegroup.ScalingConfig.MaxSize != nil && *desc.Nodegroup.ScalingConfig.MaxSize >= 1 {
+		maxSize = *desc.Nodegroup.ScalingConfig.MaxSize
+	}
+	if _, err := eksAPI.UpdateNodegroupConfig(ctx, &eks.UpdateNodegroupConfigInput{
+		ClusterName:   awssdk.String(clusterName),
+		NodegroupName: awssdk.String(nodegroupName),
+		ScalingConfig: &ekstypes.NodegroupScalingConfig{
+			MinSize:     awssdk.Int32(0),
+			MaxSize:     awssdk.Int32(maxSize),
+			DesiredSize: awssdk.Int32(0),
+		},
+	}); err != nil {
+		return fmt.Errorf("UpdateNodegroupConfig: %w", err)
+	}
+	log.Printf("EKS node group %s/%s scaling config set to min=desired=0 (max=%d); waiting for EKS to drain its nodes…", clusterName, nodegroupName, maxSize)
+
+	return waitEKSNodegroupEmpty(ctx, clientset, clusterName, nodegroupName, drainOpts.NodeTimeout, drainOpts.PollInterval)
+}
+
+// waitEKSNodegroupEmpty polls the Kubernetes API for nodes carrying the
+// `eks.amazonaws.com/nodegroup=<name>` label until none remain or the deadline
+// expires. The EKS drain is bounded by drainOpts.NodeTimeout — for large node
+// groups, callers should raise this above the default.
+func waitEKSNodegroupEmpty(ctx context.Context, clientset kubernetes.Interface, clusterName, nodegroupName string, timeout, pollInterval time.Duration) error {
+	selector := commonaws.LabelEKSNodegroup + "=" + nodegroupName
+	deadline := time.Now().Add(timeout)
+	for {
+		list, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: selector})
+		if err != nil {
+			// We already accepted the EKS scaling change, so EKS may still
+			// be draining. Wrap errEKSDrainIncomplete so the orchestrator
+			// keeps the temporary PDBs in place; a re-run will reach the
+			// poll loop again and cleanup once EKS converges.
+			return fmt.Errorf("list EKS managed node group %s/%s nodes: %w: %w", clusterName, nodegroupName, err, errEKSDrainIncomplete)
+		}
+		if len(list.Items) == 0 {
+			log.Printf("EKS node group %s/%s fully drained.", clusterName, nodegroupName)
+			return nil
+		}
+		if time.Now().After(deadline) {
+			return fmt.Errorf("EKS node group %s/%s: %d node(s) still present after %s: %w", clusterName, nodegroupName, len(list.Items), timeout, errEKSDrainIncomplete)
+		}
+		select {
+		case <-time.After(pollInterval):
+		case <-ctx.Done():
+			// Cancellation (SIGINT) — per design, temp PDBs may leak and
+			// a subsequent run reaps them via the label selector. Do NOT
+			// wrap errEKSDrainIncomplete here.
+			return ctx.Err()
+		}
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/eks_mng_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/eks_mng_test.go
new file mode 100644
index 0000000000..fbfa99b5df
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/eks_mng_test.go
@@ -0,0 +1,240 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+
+	awssdk "github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/service/eks"
+	ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+	clienttesting "k8s.io/client-go/testing"
+)
+
+type stubEKS struct {
+	gotInputs      []*eks.UpdateNodegroupConfigInput
+	err            error
+	describedMax   int32 // 0 ⇒ stub returns nil ScalingConfig
+	describeErr    error
+	describeCalled int
+}
+
+func (s *stubEKS) DescribeNodegroup(_ context.Context, in *eks.DescribeNodegroupInput, _ ...func(*eks.Options)) (*eks.DescribeNodegroupOutput, error) {
+	s.describeCalled++
+	if s.describeErr != nil {
+		return nil, s.describeErr
+	}
+	out := &eks.DescribeNodegroupOutput{
+		Nodegroup: &ekstypes.Nodegroup{
+			ClusterName:   in.ClusterName,
+			NodegroupName: in.NodegroupName,
+		},
+	}
+	if s.describedMax > 0 {
+		out.Nodegroup.ScalingConfig = &ekstypes.NodegroupScalingConfig{
+			MaxSize: awssdk.Int32(s.describedMax),
+		}
+	}
+	return out, nil
+}
+
+func (s *stubEKS) UpdateNodegroupConfig(_ context.Context, in *eks.UpdateNodegroupConfigInput, _ ...func(*eks.Options)) (*eks.UpdateNodegroupConfigOutput, error) {
+	s.gotInputs = append(s.gotInputs, in)
+	return &eks.UpdateNodegroupConfigOutput{}, s.err
+}
+
+func mngDrainOpts() nodeDrainOptions {
+	return nodeDrainOptions{
+		NodeTimeout:  5 * time.Second,
+		PollInterval: 10 * time.Millisecond,
+	}
+}
+
+func TestEvictEKSManagedNodeGroup(t *testing.T) {
+	mngNode := &corev1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   "ip-1",
+			Labels: map[string]string{"eks.amazonaws.com/nodegroup": "my-mng"},
+		},
+	}
+	apiErr := errors.New("api error")
+	listErr := errors.New("apiserver unreachable")
+
+	for _, tc := range []struct {
+		name string
+		// updateErr is what stubEKS returns from UpdateNodegroupConfig. nil
+		// ⇒ success.
+		updateErr error
+		// describeErr is what stubEKS returns from DescribeNodegroup. nil
+		// ⇒ success.
+		describeErr error
+		// describedMax is the MaxSize reported by the stubbed
+		// DescribeNodegroup. 0 ⇒ stub returns nil ScalingConfig (simulating
+		// a node group described without a scaling config).
+		describedMax int32
+		// nodes are pre-loaded into the fake clientset (i.e. nodes that EKS
+		// would be draining).
+		nodes []runtime.Object
+		// listResponder, when non-nil, overrides the default "list nodes"
+		// behaviour. Used to simulate the EKS-side drain finishing between
+		// polls, or a transient List error.
+		listResponder func(call int) (handle bool, resp runtime.Object, err error)
+		dryRun        bool
+		// shortTimeout uses a 100ms NodeTimeout to force the wait loop to
+		// give up; default keeps the longer mngDrainOpts() value.
+		shortTimeout bool
+
+		wantDescribeCallCount int
+		wantUpdateCallCount   int
+		// wantUpdateMaxSize is the MaxSize the test asserts on the
+		// UpdateNodegroupConfig input. Only checked when
+		// wantUpdateCallCount > 0.
+		wantUpdateMaxSize int32
+		wantErr           bool
+		// wantWrapsSentinel asserts errors.Is(err, errEKSDrainIncomplete).
+		wantWrapsSentinel bool
+		// wantNotWrapsSentinel asserts errors.Is(err, errEKSDrainIncomplete) is false.
+		wantNotWrapsSentinel bool
+		wantErrContains      string
+	}{
+		{
+			name:                  "dry-run skips Describe + Update",
+			dryRun:                true,
+			wantDescribeCallCount: 0,
+			wantUpdateCallCount:   0,
+		},
+		{
+			name:                  "DescribeNodegroup failure short-circuits",
+			describeErr:           apiErr,
+			wantDescribeCallCount: 1,
+			wantUpdateCallCount:   0,
+			wantErr:               true,
+			wantErrContains:       "DescribeNodegroup",
+			wantNotWrapsSentinel:  true,
+		},
+		{
+			name:                  "UpdateNodegroupConfig failure does not wrap sentinel",
+			describedMax:          5,
+			updateErr:             apiErr,
+			wantDescribeCallCount: 1,
+			wantUpdateCallCount:   1,
+			wantUpdateMaxSize:     5,
+			wantErr:               true,
+			wantErrContains:       "UpdateNodegroupConfig",
+			wantNotWrapsSentinel:  true,
+		},
+		{
+			// EKS finishes draining between two polls: first List sees the
+			// node, second returns empty. Counter-based reactor avoids the
+			// time.Sleep + goroutine race the goroutine version had under
+			// loaded CI.
+			name:         "waits for nodes to disappear",
+			nodes:        []runtime.Object{mngNode},
+			describedMax: 5,
+			listResponder: func(call int) (bool, runtime.Object, error) {
+				if call > 1 {
+					return true, &corev1.NodeList{}, nil
+				}
+				return false, nil, nil
+			},
+			wantDescribeCallCount: 1,
+			wantUpdateCallCount:   1,
+			wantUpdateMaxSize:     5,
+		},
+		{
+			// Described scaling config is nil ⇒ Update must still pass an
+			// API-valid MaxSize (>= 1). Ensures evict never blindly sends 0.
+			name:                  "describe without scaling config falls back to max=1",
+			nodes:                 []runtime.Object{mngNode},
+			describedMax:          0,
+			shortTimeout:          true,
+			wantDescribeCallCount: 1,
+			wantUpdateCallCount:   1,
+			wantUpdateMaxSize:     1,
+			wantErr:               true,
+			wantErrContains:       "my-mng",
+			wantWrapsSentinel:     true,
+		},
+		{
+			// Wait timeout while nodes persist: orchestrator must keep
+			// temp PDBs in place ⇒ sentinel wrapped.
+			name:                  "wait timeout wraps the sentinel",
+			nodes:                 []runtime.Object{mngNode},
+			describedMax:          5,
+			shortTimeout:          true,
+			wantDescribeCallCount: 1,
+			wantUpdateCallCount:   1,
+			wantUpdateMaxSize:     5,
+			wantErr:               true,
+			wantErrContains:       "my-mng",
+			wantWrapsSentinel:     true,
+		},
+		{
+			// EKS scaling change succeeded, but a subsequent K8s Node list
+			// call errors out. EKS may still be draining ⇒ sentinel wrapped.
+			name:                  "post-Update list failure wraps the sentinel",
+			describedMax:          5,
+			listResponder:         func(_ int) (bool, runtime.Object, error) { return true, nil, listErr },
+			wantDescribeCallCount: 1,
+			wantUpdateCallCount:   1,
+			wantUpdateMaxSize:     5,
+			wantErr:               true,
+			wantWrapsSentinel:     true,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			stub := &stubEKS{
+				err:          tc.updateErr,
+				describeErr:  tc.describeErr,
+				describedMax: tc.describedMax,
+			}
+			client := fake.NewClientset(tc.nodes...)
+			if tc.listResponder != nil {
+				var listCalls int
+				client.PrependReactor("list", "nodes", func(_ clienttesting.Action) (bool, runtime.Object, error) {
+					listCalls++
+					return tc.listResponder(listCalls)
+				})
+			}
+			opts := mngDrainOpts()
+			opts.DryRun = tc.dryRun
+			if tc.shortTimeout {
+				opts.NodeTimeout = 100 * time.Millisecond
+				opts.PollInterval = 30 * time.Millisecond
+			}
+
+			err := evictEKSManagedNodeGroup(t.Context(), stub, client, "my-cluster", "my-mng", opts)
+			if tc.wantErr {
+				require.Error(t, err)
+				if tc.wantErrContains != "" {
+					assert.Contains(t, err.Error(), tc.wantErrContains)
+				}
+			} else {
+				require.NoError(t, err)
+			}
+			if tc.wantWrapsSentinel {
+				assert.ErrorIs(t, err, errEKSDrainIncomplete)
+			}
+			if tc.wantNotWrapsSentinel {
+				assert.NotErrorIs(t, err, errEKSDrainIncomplete)
+			}
+			assert.Equal(t, tc.wantDescribeCallCount, stub.describeCalled)
+			require.Len(t, stub.gotInputs, tc.wantUpdateCallCount)
+			for _, in := range stub.gotInputs {
+				assert.Equal(t, "my-cluster", awssdk.ToString(in.ClusterName))
+				assert.Equal(t, "my-mng", awssdk.ToString(in.NodegroupName))
+				require.NotNil(t, in.ScalingConfig)
+				assert.Equal(t, int32(0), awssdk.ToInt32(in.ScalingConfig.MinSize))
+				assert.Equal(t, tc.wantUpdateMaxSize, awssdk.ToInt32(in.ScalingConfig.MaxSize))
+				assert.Equal(t, int32(0), awssdk.ToInt32(in.ScalingConfig.DesiredSize))
+			}
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/evict.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/evict.go
new file mode 100644
index 0000000000..e66d4385b6
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/evict.go
@@ -0,0 +1,157 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os/signal"
+	"syscall"
+	"time"
+
+	"github.com/spf13/cobra"
+	"k8s.io/cli-runtime/pkg/genericclioptions"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clients"
+	"github.com/DataDog/datadog-operator/pkg/plugin/common"
+)
+
+var evictExample = `
+  # evict every node group that is not Datadog-managed (cluster-autoscaler ASGs,
+  # EKS managed node groups, user Karpenter NodePools, standalone EC2)
+  %[1]s evict-legacy-nodes --all
+
+  # evict a single ASG by name
+  %[1]s evict-legacy-nodes --target=asg/my-legacy-asg
+
+  # preview the actions without performing them
+  %[1]s evict-legacy-nodes --all --dry-run
+`
+
+type options struct {
+	genericclioptions.IOStreams
+	common.Options
+	args []string
+
+	clusterName        string
+	karpenterNamespace string
+	all                bool
+	targetSpecs        []string
+	targets            []Target // populated by validate()
+	skipCA             bool
+	ensurePDBs         bool
+	evictionTimeout    time.Duration
+	nodeTimeout        time.Duration
+	dryRun             bool
+	yes                bool
+	debug              bool
+}
+
+func newOptions(streams genericclioptions.IOStreams) *options {
+	o := &options{
+		IOStreams:       streams,
+		ensurePDBs:      true,
+		evictionTimeout: 5 * time.Minute,
+		nodeTimeout:     15 * time.Minute,
+	}
+	o.SetConfigFlags()
+	return o
+}
+
+// New returns the cobra command for `kubectl datadog autoscaling cluster evict-legacy-nodes`.
+func New(streams genericclioptions.IOStreams) *cobra.Command {
+	o := newOptions(streams)
+	cmd := &cobra.Command{
+		Use:          "evict-legacy-nodes",
+		Short:        "Drain workloads from non-Datadog node groups onto Datadog-managed Karpenter NodePools",
+		Example:      fmt.Sprintf(evictExample, "kubectl datadog autoscaling cluster"),
+		SilenceUsage: true,
+		RunE: func(c *cobra.Command, args []string) error {
+			if err := o.complete(c, args); err != nil {
+				return err
+			}
+			if err := o.validate(); err != nil {
+				return err
+			}
+			return o.run()
+		},
+	}
+
+	cmd.Flags().StringVar(&o.clusterName, "cluster-name", "", "Name of the EKS cluster")
+	cmd.Flags().StringVar(&o.karpenterNamespace, "karpenter-namespace", "", "Namespace where Karpenter is deployed (auto-detected when empty)")
+	cmd.Flags().BoolVar(&o.all, "all", false, "Evict every node group that is not managed by Datadog")
+	cmd.Flags().StringSliceVar(&o.targetSpecs, "target", nil, "Target a specific node group: <manager>/<name>, with <manager> one of asg, eksManagedNodeGroup, karpenter. Use `standalone` (no name) for standalone EC2 instances. Repeatable. Mutually exclusive with --all.")
+	cmd.Flags().BoolVar(&o.skipCA, "skip-cluster-autoscaler", false, "Do not scale the cluster-autoscaler Deployment to 0 replicas as step 1")
+	cmd.Flags().BoolVar(&o.ensurePDBs, "ensure-pdbs", true, "Create temporary PodDisruptionBudgets (maxUnavailable: 1) for workloads without one, and remove them at the end")
+	cmd.Flags().DurationVar(&o.evictionTimeout, "eviction-timeout", 5*time.Minute, "Time budget per pod for the Eviction API to succeed before giving up (PDB-blocked pods)")
+	cmd.Flags().DurationVar(&o.nodeTimeout, "node-timeout", 15*time.Minute, "Time budget per node for it to become empty after pods have been evicted")
+	cmd.Flags().BoolVar(&o.dryRun, "dry-run", false, "Log the actions that would be taken without performing them")
+	cmd.Flags().BoolVar(&o.yes, "yes", false, "Skip the confirmation prompt")
+	cmd.Flags().BoolVar(&o.debug, "debug", false, "Enable debug logs")
+
+	o.ConfigFlags.AddFlags(cmd.Flags())
+
+	return cmd
+}
+
+func (o *options) complete(cmd *cobra.Command, args []string) error {
+	o.args = args
+	return o.Init(cmd)
+}
+
+func (o *options) validate() error {
+	if len(o.args) > 0 {
+		return errors.New("no positional arguments are allowed")
+	}
+	if o.all && len(o.targetSpecs) > 0 {
+		return errors.New("--all and --target are mutually exclusive")
+	}
+	if !o.all && len(o.targetSpecs) == 0 {
+		return errors.New("at least one of --all or --target must be provided")
+	}
+	if o.evictionTimeout <= 0 {
+		return errors.New("--eviction-timeout must be positive")
+	}
+	if o.nodeTimeout <= 0 {
+		return errors.New("--node-timeout must be positive")
+	}
+	var (
+		parsed []Target
+		errs   []error
+	)
+	for _, spec := range o.targetSpecs {
+		if t, err := ParseTargetSpec(spec); err != nil {
+			errs = append(errs, err)
+		} else {
+			parsed = append(parsed, t)
+		}
+	}
+	if len(errs) > 0 {
+		return errors.Join(errs...)
+	}
+	o.targets = parsed
+	return nil
+}
+
+func (o *options) run() error {
+	ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+	defer stop()
+
+	clusterName, err := clients.ResolveClusterName(o.ConfigFlags, o.clusterName)
+	if err != nil {
+		return err
+	}
+
+	return Run(ctx, o.IOStreams, o.ConfigFlags, o.Clientset, RunOptions{
+		ClusterName:        clusterName,
+		KarpenterNamespace: o.karpenterNamespace,
+		All:                o.all,
+		Targets:            o.targets,
+		SkipCA:             o.skipCA,
+		EnsurePDBs:         o.ensurePDBs,
+		DryRun:             o.dryRun,
+		Yes:                o.yes,
+		EvictionTimeout:    o.evictionTimeout,
+		NodeTimeout:        o.nodeTimeout,
+		Debug:              o.debug,
+	})
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/evict_pods.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/evict_pods.go
new file mode 100644
index 0000000000..c3496fc109
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/evict_pods.go
@@ -0,0 +1,172 @@
+package evict
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"slices"
+	"time"
+
+	"github.com/samber/lo"
+	corev1 "k8s.io/api/core/v1"
+	policyv1 "k8s.io/api/policy/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/fields"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/pager"
+)
+
+// nodeDrainOptions captures the per-call tunables for evicting a node's pods.
+type nodeDrainOptions struct {
+	DryRun          bool
+	EvictionTimeout time.Duration // per pod, bound for retries on 429
+	NodeTimeout     time.Duration // total wait for the node to become empty
+	PollInterval    time.Duration // interval between empty-checks; default 2s
+}
+
+// drainNode evicts every evictable pod from the node and waits for the node to
+// become empty. Pods owned by a DaemonSet, mirror pods, terminating pods and
+// completed Job pods are skipped — the kubelet handles their cleanup when the
+// underlying instance disappears.
+//
+// Pods that cannot be evicted (PDB-blocked beyond EvictionTimeout, etc.) are
+// logged as warnings; drainNode then continues with the remaining pods rather
+// than aborting the whole run.
+func drainNode(ctx context.Context, clientset kubernetes.Interface, nodeName string, opts nodeDrainOptions) error {
+	if opts.DryRun {
+		log.Printf("[dry-run] would drain node %s", nodeName)
+		return nil
+	}
+	pods, err := listPodsOnNode(ctx, clientset, nodeName)
+	if err != nil {
+		return fmt.Errorf("failed to list pods on node %s: %w", nodeName, err)
+	}
+	for _, p := range pods {
+		if shouldSkipEviction(&p) {
+			continue
+		}
+		if err := evictPodWithRetry(ctx, clientset, &p, opts.EvictionTimeout, opts.PollInterval); err != nil {
+			log.Printf("Warning: pod %s/%s: %v", p.Namespace, p.Name, err)
+		}
+	}
+	return waitForNodeEmpty(ctx, clientset, nodeName, opts.NodeTimeout, opts.PollInterval)
+}
+
+// listPodsOnNode enumerates pods scheduled on the given node, server-side
+// filtered via the spec.nodeName field selector. Uses the client-go pager
+// defaults so very large nodes (250 pods+) don't trigger oversized list calls.
+func listPodsOnNode(ctx context.Context, clientset kubernetes.Interface, nodeName string) (pods []corev1.Pod, err error) {
+	p := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
+		return clientset.CoreV1().Pods(metav1.NamespaceAll).List(ctx, opts)
+	})
+	if err = p.EachListItem(ctx, metav1.ListOptions{
+		FieldSelector: fields.OneTermEqualSelector("spec.nodeName", nodeName).String(),
+	}, func(obj runtime.Object) error {
+		pods = append(pods, *obj.(*corev1.Pod))
+		return nil
+	}); err != nil {
+		return nil, err
+	}
+	return pods, nil
+}
+
+// evictPodWithRetry sends a single Eviction request and retries while the
+// apiserver returns 429 TooManyRequests (the canonical PDB-blocked signal).
+// Aborts after timeout; the caller then logs and moves to the next pod.
+//
+// 404 (pod already gone) is treated as success — the eviction goal is met.
+// Non-429 errors are returned immediately.
+func evictPodWithRetry(ctx context.Context, clientset kubernetes.Interface, p *corev1.Pod, timeout, retryInterval time.Duration) error {
+	eviction := &policyv1.Eviction{
+		ObjectMeta: metav1.ObjectMeta{Name: p.Name, Namespace: p.Namespace},
+	}
+	deadline := time.Now().Add(timeout)
+	for {
+		err := clientset.CoreV1().Pods(p.Namespace).EvictV1(ctx, eviction)
+		switch {
+		case err == nil:
+			log.Printf("Evicted pod %s/%s.", p.Namespace, p.Name)
+			return nil
+		case apierrors.IsNotFound(err):
+			return nil
+		case !apierrors.IsTooManyRequests(err):
+			return fmt.Errorf("eviction failed: %w", err)
+		case time.Now().After(deadline):
+			return fmt.Errorf("eviction timed out (likely PDB-blocked): %w", err)
+		default:
+			select {
+			case <-time.After(retryInterval):
+			case <-ctx.Done():
+				return ctx.Err()
+			}
+		}
+	}
+}
+
+// waitForNodeEmpty polls the node until no pod still occupies it. A pod is
+// considered to still occupy the node until it is actually gone from the API
+// server — including pods that are merely terminating (DeletionTimestamp
+// set). This matters for callers that terminate the underlying EC2 instance
+// next: returning early while a pod is mid-grace-period would kill the
+// container before its preStop hook / graceful shutdown finishes.
+//
+// DaemonSet pods, mirror pods and completed pods are not counted: DS and
+// mirror pods are tied to the node's lifecycle and disappear with it,
+// completed pods are inert and GC'd promptly.
+func waitForNodeEmpty(ctx context.Context, clientset kubernetes.Interface, nodeName string, timeout, pollInterval time.Duration) error {
+	deadline := time.Now().Add(timeout)
+	for {
+		pods, err := listPodsOnNode(ctx, clientset, nodeName)
+		if err != nil {
+			return fmt.Errorf("failed to list pods on node %s: %w", nodeName, err)
+		}
+		remaining := lo.CountBy(pods, func(p corev1.Pod) bool {
+			return podOccupiesNode(&p)
+		})
+		if remaining == 0 {
+			return nil
+		}
+		if time.Now().After(deadline) {
+			return fmt.Errorf("timed out waiting for node %s to drain: %d pod(s) still present", nodeName, remaining)
+		}
+		select {
+		case <-time.After(pollInterval):
+		case <-ctx.Done():
+			return ctx.Err()
+		}
+	}
+}
+
+// shouldSkipEviction reports whether drainNode should leave a pod alone
+// rather than call the Eviction API on it. A pod already terminating is
+// skipped because issuing another eviction would 404 once the kubelet
+// finishes the deletion; DaemonSet/mirror/completed pods are skipped because
+// they cannot or need not be evicted.
+func shouldSkipEviction(p *corev1.Pod) bool {
+	return p.DeletionTimestamp != nil || isMirrorPod(p) || isDaemonSetPod(p) || isCompleted(p)
+}
+
+// podOccupiesNode reports whether a pod still consumes the node from a drain
+// perspective. Terminating pods DO occupy the node until they are gone;
+// returning false for them prematurely would let the caller terminate the
+// instance before the container finished its grace period.
+func podOccupiesNode(p *corev1.Pod) bool {
+	return !isMirrorPod(p) && !isDaemonSetPod(p) && !isCompleted(p)
+}
+
+func isMirrorPod(p *corev1.Pod) bool {
+	_, ok := p.Annotations[corev1.MirrorPodAnnotationKey]
+	return ok
+}
+
+func isDaemonSetPod(p *corev1.Pod) bool {
+	return slices.ContainsFunc(p.OwnerReferences, func(owner metav1.OwnerReference) bool {
+		return owner.Kind == "DaemonSet"
+	})
+}
+
+func isCompleted(p *corev1.Pod) bool {
+	return p.Status.Phase == corev1.PodSucceeded || p.Status.Phase == corev1.PodFailed
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/evict_pods_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/evict_pods_test.go
new file mode 100644
index 0000000000..c6f386f5b2
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/evict_pods_test.go
@@ -0,0 +1,214 @@
+package evict
+
+import (
+	"errors"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	policyv1 "k8s.io/api/policy/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/kubernetes/fake"
+	clienttesting "k8s.io/client-go/testing"
+)
+
+// installPodEvictionReactor makes a fake clientset accept Eviction
+// subresource creates and echo the eviction object back. The Pod itself is
+// NOT removed from the tracker — tests that want a pod-removal effect (e.g.
+// to make drainNode observe an empty node) must add a "delete pods" reactor
+// explicitly, or use a fixture that starts with no pod on the node.
+func installPodEvictionReactor(client *fake.Clientset) {
+	client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
+		ca, ok := action.(clienttesting.CreateAction)
+		if !ok || ca.GetSubresource() != "eviction" {
+			return false, nil, nil
+		}
+		return true, ca.GetObject(), nil
+	})
+}
+
+func TestShouldSkipEviction(t *testing.T) {
+	now := metav1.Now()
+	for _, tc := range []struct {
+		name string
+		pod  *corev1.Pod
+		skip bool
+	}{
+		{name: "regular pod", pod: &corev1.Pod{}, skip: false},
+		{name: "terminating", pod: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now}}, skip: true},
+		{name: "mirror", pod: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{
+			Annotations: map[string]string{corev1.MirrorPodAnnotationKey: "x"},
+		}}, skip: true},
+		{name: "daemonset", pod: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{
+			OwnerReferences: []metav1.OwnerReference{{Kind: "DaemonSet", Name: "ds"}},
+		}}, skip: true},
+		{name: "succeeded job", pod: &corev1.Pod{Status: corev1.PodStatus{Phase: corev1.PodSucceeded}}, skip: true},
+		{name: "failed job", pod: &corev1.Pod{Status: corev1.PodStatus{Phase: corev1.PodFailed}}, skip: true},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.skip, shouldSkipEviction(tc.pod))
+		})
+	}
+}
+
+// TestPodOccupiesNode locks in the asymmetry between "skip eviction" and
+// "keeps the node busy": terminating pods are skipped from eviction (the
+// kubelet is already deleting them) BUT still occupy the node for drain
+// purposes (otherwise we'd terminate the instance mid-grace-period and kill
+// the container before its preStop hook finishes).
+func TestPodOccupiesNode(t *testing.T) {
+	now := metav1.Now()
+	for _, tc := range []struct {
+		name     string
+		pod      *corev1.Pod
+		occupies bool
+	}{
+		{name: "regular pod", pod: &corev1.Pod{}, occupies: true},
+		{name: "terminating pod still occupies", pod: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now}}, occupies: true},
+		{name: "mirror pod", pod: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{
+			Annotations: map[string]string{corev1.MirrorPodAnnotationKey: "x"},
+		}}, occupies: false},
+		{name: "daemonset pod", pod: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{
+			OwnerReferences: []metav1.OwnerReference{{Kind: "DaemonSet", Name: "ds"}},
+		}}, occupies: false},
+		{name: "succeeded job", pod: &corev1.Pod{Status: corev1.PodStatus{Phase: corev1.PodSucceeded}}, occupies: false},
+		{name: "failed job", pod: &corev1.Pod{Status: corev1.PodStatus{Phase: corev1.PodFailed}}, occupies: false},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.occupies, podOccupiesNode(tc.pod))
+		})
+	}
+}
+
+func TestEvictPodWithRetry(t *testing.T) {
+	// evictionResponder shapes the test-side of the Eviction subresource
+	// reactor. `call` is the 1-based call counter.
+	type evictionResponder func(call int, eviction *policyv1.Eviction) (resp runtime.Object, err error)
+
+	echoSuccess := evictionResponder(func(_ int, e *policyv1.Eviction) (runtime.Object, error) {
+		return e, nil
+	})
+	tooManyOnceThenSuccess := evictionResponder(func(call int, e *policyv1.Eviction) (runtime.Object, error) {
+		if call == 1 {
+			return nil, apierrors.NewTooManyRequests("PDB blocked", 1)
+		}
+		return e, nil
+	})
+	notFound := evictionResponder(func(_ int, e *policyv1.Eviction) (runtime.Object, error) {
+		return nil, apierrors.NewNotFound(schema.GroupResource{Resource: "pods"}, e.Name)
+	})
+	nonRetryable := evictionResponder(func(_ int, _ *policyv1.Eviction) (runtime.Object, error) {
+		return nil, errors.New("non-retryable")
+	})
+
+	for _, tc := range []struct {
+		name string
+		// pod is the object passed to evictPodWithRetry (and pre-loaded
+		// into the fake when seedPod is true).
+		pod     *corev1.Pod
+		seedPod bool
+		// responder controls what the Eviction reactor returns each call.
+		responder evictionResponder
+		timeout   time.Duration
+
+		wantErr          bool
+		wantErrContains  string
+		wantMinCalls     int
+		wantCapturedName string
+		wantCapturedNs   string
+	}{
+		{
+			name:         "success on first call",
+			pod:          &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Namespace: "default"}},
+			seedPod:      true,
+			responder:    echoSuccess,
+			timeout:      5 * time.Second,
+			wantMinCalls: 1,
+		},
+		{
+			name:         "retries on 429 then succeeds",
+			pod:          &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Namespace: "default"}},
+			seedPod:      true,
+			responder:    tooManyOnceThenSuccess,
+			timeout:      10 * time.Second,
+			wantMinCalls: 2,
+		},
+		{
+			// Pod already gone: 404 from the apiserver is treated as
+			// success — the eviction goal is met regardless.
+			name:         "404 from apiserver is success",
+			pod:          &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Namespace: "default"}},
+			seedPod:      false,
+			responder:    notFound,
+			timeout:      time.Second,
+			wantMinCalls: 1,
+		},
+		{
+			name:            "non-retryable error returned",
+			pod:             &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Namespace: "default"}},
+			seedPod:         true,
+			responder:       nonRetryable,
+			timeout:         5 * time.Second,
+			wantErr:         true,
+			wantErrContains: "eviction failed",
+			wantMinCalls:    1,
+		},
+		{
+			// Smoke check that the Eviction object carries the pod's
+			// Name/Namespace through to the apiserver.
+			name:             "eviction object name/namespace",
+			pod:              &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Namespace: "ns1"}},
+			seedPod:          true,
+			responder:        echoSuccess,
+			timeout:          time.Second,
+			wantMinCalls:     1,
+			wantCapturedName: "p1",
+			wantCapturedNs:   "ns1",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			var seed []runtime.Object
+			if tc.seedPod {
+				seed = append(seed, tc.pod)
+			}
+			client := fake.NewClientset(seed...)
+
+			var (
+				calls    int
+				captured *policyv1.Eviction
+			)
+			client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
+				ca, ok := action.(clienttesting.CreateAction)
+				if !ok || ca.GetSubresource() != "eviction" {
+					return false, nil, nil
+				}
+				calls++
+				eviction := ca.GetObject().(*policyv1.Eviction)
+				captured = eviction
+				resp, err := tc.responder(calls, eviction)
+				return true, resp, err
+			})
+
+			err := evictPodWithRetry(t.Context(), client, tc.pod, tc.timeout, 10*time.Millisecond)
+			if tc.wantErr {
+				require.Error(t, err)
+				if tc.wantErrContains != "" {
+					assert.Contains(t, err.Error(), tc.wantErrContains)
+				}
+			} else {
+				require.NoError(t, err)
+			}
+			assert.GreaterOrEqual(t, calls, tc.wantMinCalls)
+			if tc.wantCapturedName != "" {
+				require.NotNil(t, captured)
+				assert.Equal(t, tc.wantCapturedName, captured.Name)
+				assert.Equal(t, tc.wantCapturedNs, captured.Namespace)
+			}
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/karpenter_user.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/karpenter_user.go
new file mode 100644
index 0000000000..1473a2aa01
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/karpenter_user.go
@@ -0,0 +1,39 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+
+	"k8s.io/client-go/kubernetes"
+)
+
+// evictKarpenterUserNodePool cordons and drains the nodes managed by a user-
+// created Karpenter NodePool. Once each node is cordoned and empty, Karpenter
+// itself observes the state and terminates the underlying NodeClaim, and the
+// pods re-scheduled by their controllers will be placed by Karpenter on the
+// best-matching NodePool (ideally one managed by Datadog, if its spec.weight
+// is high enough).
+//
+// The user NodePool spec is NOT modified — this command intentionally leaves
+// user-managed NodePool configuration intact. If a user NodePool has a
+// spec.weight greater than or equal to the Datadog NodePool weight, evicted
+// pods may land on a freshly provisioned node from the SAME user NodePool.
+// That case is surfaced by a pre-flight warning in the orchestrator; this
+// function makes no attempt to re-balance the weights.
+func evictKarpenterUserNodePool(ctx context.Context, clientset kubernetes.Interface, nodePoolName string, nodes []string, drainOpts nodeDrainOptions) error {
+	// Cordon every node up front so a pod evicted from one node is never
+	// rescheduled onto another node of the same NodePool that is itself about
+	// to be drained.
+	cordoned, errs := cordonNodes(ctx, clientset, nodes, drainOpts.DryRun)
+	for _, node := range cordoned {
+		if err := drainNode(ctx, clientset, node.Name, drainOpts); err != nil {
+			errs = append(errs, fmt.Errorf("drain node %s: %w", node.Name, err))
+		}
+	}
+	if !drainOpts.DryRun && len(errs) == 0 {
+		log.Printf("Drained %d node(s) from user NodePool %s; Karpenter will terminate their NodeClaims once empty.", len(cordoned), nodePoolName)
+	}
+	return errors.Join(errs...)
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/karpenter_user_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/karpenter_user_test.go
new file mode 100644
index 0000000000..1f2bbff530
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/karpenter_user_test.go
@@ -0,0 +1,33 @@
+package evict
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+func TestEvictKarpenterUserNodePool(t *testing.T) {
+	for _, tc := range []struct {
+		name              string
+		dryRun            bool
+		wantUnschedulable bool
+	}{
+		{name: "cordons and drains", dryRun: false, wantUnschedulable: true},
+		{name: "dry-run touches nothing", dryRun: true, wantUnschedulable: false},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n1"}}
+			client := fake.NewClientset(node)
+
+			require.NoError(t, evictKarpenterUserNodePool(t.Context(), client, "user-np", []string{"n1"}, newDrainOpts(tc.dryRun)))
+
+			got, err := client.CoreV1().Nodes().Get(t.Context(), "n1", metav1.GetOptions{})
+			require.NoError(t, err)
+			assert.Equal(t, tc.wantUnschedulable, got.Spec.Unschedulable)
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/pdb.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/pdb.go
new file mode 100644
index 0000000000..9271184939
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/pdb.go
@@ -0,0 +1,355 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"reflect"
+	"slices"
+	"strings"
+
+	"github.com/samber/lo"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	policyv1 "k8s.io/api/policy/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/apimachinery/pkg/util/validation"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/pager"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	commonk8s "github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/k8s"
+)
+
+// Label keys used to mark PodDisruptionBudgets created by this command. Both
+// must be present for cleanup to consider the PDB ours — this makes the
+// cleanup safe against accidentally removing a user PDB with a colliding name.
+const (
+	pdbManagedByLabelKey   = "app.kubernetes.io/managed-by"
+	pdbManagedByLabelValue = "kubectl-datadog"
+	pdbTempLabelKey        = "autoscaling.datadoghq.com/temporary-pdb"
+	pdbTempLabelValue      = "true"
+	// pdbNameSuffix is appended to the controller name to form the temp PDB
+	// name. Kept short so that long controller names stay under the 63-char
+	// DNS label limit after truncation.
+	pdbNameSuffix = "-evict-legacy"
+)
+
+// ensureTempPDBs scans the pods running on the nodes of every target and
+// creates a temporary PodDisruptionBudget (maxUnavailable: 1) for each
+// top-level controller (Deployment, StatefulSet, bare ReplicaSet) that does
+// not already have one with a matching selector.
+//
+// The created PDBs carry two labels (managed-by + temporary-pdb) that the
+// cleanup step uses to find and delete them, regardless of which process
+// created them. ensureTempPDBs itself is idempotent: a PDB created by a
+// previous (possibly crashed) run is detected by its labels and left alone.
+func ensureTempPDBs(ctx context.Context, clientset kubernetes.Interface, ctrlClient client.Client, targets []Target, dryRun bool) error {
+	// Targets across all manager types are merged: the orchestrator blocks
+	// on waitEKSNodegroupEmpty before cleaning up the temporary PDBs, so EKS
+	// MNG nodes observe the PDBs during their drain too — without that, EKS
+	// could disrupt every replica of an otherwise unprotected workload at
+	// once when all replicas happen to live on the same node group.
+	allNodes := lo.FlatMap(targets, func(t Target, _ int) []string { return t.Nodes })
+	nodeSet := lo.SliceToMap(allNodes, func(n string) (string, struct{}) { return n, struct{}{} })
+	if len(nodeSet) == 0 {
+		return nil
+	}
+
+	controllers, err := discoverControllers(ctx, clientset, nodeSet)
+	if err != nil {
+		return fmt.Errorf("failed to discover controllers: %w", err)
+	}
+	if len(controllers) == 0 {
+		return nil
+	}
+
+	// Group controllers by namespace to amortize the per-namespace PDB list.
+	byNamespace := make(map[string][]controllerInfo)
+	for _, c := range controllers {
+		byNamespace[c.Namespace] = append(byNamespace[c.Namespace], c)
+	}
+
+	var errs []error
+	for ns, ctrls := range byNamespace {
+		existing, err := listNamespacePDBs(ctx, clientset, ns)
+		if err != nil {
+			errs = append(errs, fmt.Errorf("namespace %s: failed to list PDBs: %w", ns, err))
+			continue
+		}
+		for _, c := range ctrls {
+			if hasUserPDB(existing, c.Selector) {
+				continue
+			}
+			if err := createTempPDB(ctx, ctrlClient, c, dryRun); err != nil {
+				errs = append(errs, fmt.Errorf("controller %s/%s/%s: %w", c.Namespace, c.Kind, c.Name, err))
+			}
+		}
+	}
+	return errors.Join(errs...)
+}
+
+// cleanupTempPDBs deletes every PodDisruptionBudget cluster-wide that carries
+// both temp-PDB labels. Listing by labels (not by a state struct returned from
+// ensureTempPDBs) is what makes the command crash-safe: a re-run after a kill
+// still finds and removes the orphans left by the previous attempt.
+func cleanupTempPDBs(ctx context.Context, ctrlClient client.Client, dryRun bool) error {
+	list := &policyv1.PodDisruptionBudgetList{}
+	if err := ctrlClient.List(ctx, list, client.MatchingLabels{
+		pdbManagedByLabelKey: pdbManagedByLabelValue,
+		pdbTempLabelKey:      pdbTempLabelValue,
+	}); err != nil {
+		return fmt.Errorf("failed to list temporary PDBs: %w", err)
+	}
+	if len(list.Items) == 0 {
+		return nil
+	}
+	var errs []error
+	for i := range list.Items {
+		pdb := &list.Items[i]
+		if dryRun {
+			log.Printf("[dry-run] would delete PDB %s/%s", pdb.Namespace, pdb.Name)
+			continue
+		}
+		if err := commonk8s.Delete(ctx, ctrlClient, pdb); err != nil {
+			errs = append(errs, fmt.Errorf("PDB %s/%s: %w", pdb.Namespace, pdb.Name, err))
+		}
+	}
+	return errors.Join(errs...)
+}
+
+// controllerKey identifies a top-level controller that owns evictable pods on
+// our target nodes. It is the dedup key in the seen map and the identity half
+// of controllerInfo.
+type controllerKey struct {
+	Namespace string
+	Kind      string // "Deployment", "StatefulSet", "ReplicaSet"
+	Name      string
+}
+
+// controllerInfo is a controllerKey plus the controller's pod selector — what a
+// PDB would match on.
+type controllerInfo struct {
+	controllerKey
+	Selector *metav1.LabelSelector
+}
+
+// discoverControllers lists every Pod cluster-wide once (paginated) and, for
+// each Pod scheduled on one of the target nodes, resolves the top-level
+// controller. Listing once and filtering client-side avoids the N-API-calls
+// problem of doing one List per node, which on a large legacy fleet would
+// dominate the command's wall-clock time. The resulting slice contains each
+// controller at most once.
+func discoverControllers(ctx context.Context, clientset kubernetes.Interface, nodeSet map[string]struct{}) ([]controllerInfo, error) {
+	seen := make(map[controllerKey]*metav1.LabelSelector)
+	depCache := make(map[client.ObjectKey]*appsv1.Deployment)
+	rsCache := make(map[client.ObjectKey]*appsv1.ReplicaSet)
+	stsCache := make(map[client.ObjectKey]*appsv1.StatefulSet)
+
+	p := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
+		return clientset.CoreV1().Pods(metav1.NamespaceAll).List(ctx, opts)
+	})
+	if err := p.EachListItem(ctx, metav1.ListOptions{}, func(obj runtime.Object) error {
+		pod := obj.(*corev1.Pod)
+		if _, onTarget := nodeSet[pod.Spec.NodeName]; !onTarget {
+			return nil
+		}
+		if shouldSkipEviction(pod) {
+			return nil
+		}
+		info, err := resolveTopLevelController(ctx, clientset, pod, depCache, rsCache, stsCache)
+		if err != nil {
+			log.Printf("Warning: cannot resolve controller for pod %s/%s: %v", pod.Namespace, pod.Name, err)
+			return nil
+		}
+		if info == nil {
+			return nil
+		}
+		seen[info.controllerKey] = info.Selector
+		return nil
+	}); err != nil {
+		return nil, fmt.Errorf("list pods: %w", err)
+	}
+
+	return lo.MapToSlice(seen, func(key controllerKey, selector *metav1.LabelSelector) controllerInfo {
+		return controllerInfo{controllerKey: key, Selector: selector}
+	}), nil
+}
+
+// resolveTopLevelController walks a Pod's owner chain up to the workload
+// controller (Deployment > ReplicaSet > Pod, StatefulSet > Pod). Returns nil
+// for Pods whose top-level owner is not a stable workload — Jobs (TTL-managed),
+// DaemonSets (already skipped at eviction), or bare Pods.
+func resolveTopLevelController(
+	ctx context.Context,
+	clientset kubernetes.Interface,
+	pod *corev1.Pod,
+	depCache map[client.ObjectKey]*appsv1.Deployment,
+	rsCache map[client.ObjectKey]*appsv1.ReplicaSet,
+	stsCache map[client.ObjectKey]*appsv1.StatefulSet,
+) (*controllerInfo, error) {
+	owner := metav1.GetControllerOf(pod)
+	if owner == nil {
+		return nil, nil
+	}
+	switch owner.Kind {
+	case "ReplicaSet":
+		rs, err := getCached(ctx, rsCache, pod.Namespace, owner.Name, clientset.AppsV1().ReplicaSets(pod.Namespace).Get)
+		if err != nil {
+			return nil, err
+		}
+		rsOwner := metav1.GetControllerOf(rs)
+		if rsOwner != nil && rsOwner.Kind == "Deployment" {
+			dep, err := getCached(ctx, depCache, pod.Namespace, rsOwner.Name, clientset.AppsV1().Deployments(pod.Namespace).Get)
+			if err != nil {
+				return nil, err
+			}
+			return &controllerInfo{
+				controllerKey: controllerKey{Namespace: pod.Namespace, Kind: "Deployment", Name: dep.Name},
+				Selector:      dep.Spec.Selector,
+			}, nil
+		}
+		return &controllerInfo{
+			controllerKey: controllerKey{Namespace: pod.Namespace, Kind: "ReplicaSet", Name: rs.Name},
+			Selector:      rs.Spec.Selector,
+		}, nil
+	case "StatefulSet":
+		sts, err := getCached(ctx, stsCache, pod.Namespace, owner.Name, clientset.AppsV1().StatefulSets(pod.Namespace).Get)
+		if err != nil {
+			return nil, err
+		}
+		return &controllerInfo{
+			controllerKey: controllerKey{Namespace: pod.Namespace, Kind: "StatefulSet", Name: sts.Name},
+			Selector:      sts.Spec.Selector,
+		}, nil
+	default:
+		// DaemonSet (skipped before reaching here), Job (TTL), CronJob,
+		// custom controllers — none get a temporary PDB.
+		return nil, nil
+	}
+}
+
+// getCached returns the object identified by (ns, name) from cache, fetching it
+// via get — the namespace-bound typed clientset accessor, e.g.
+// clientset.AppsV1().ReplicaSets(ns).Get — and populating the cache on a miss.
+// T is inferred from the cache's value type, collapsing the per-kind getters
+// into a single generic lookup.
+func getCached[T any](
+	ctx context.Context,
+	cache map[client.ObjectKey]*T,
+	ns, name string,
+	get func(ctx context.Context, name string, opts metav1.GetOptions) (*T, error),
+) (*T, error) {
+	key := client.ObjectKey{Namespace: ns, Name: name}
+	if obj, ok := cache[key]; ok {
+		return obj, nil
+	}
+	obj, err := get(ctx, name, metav1.GetOptions{})
+	if err != nil {
+		return nil, err
+	}
+	cache[key] = obj
+	return obj, nil
+}
+
+// listNamespacePDBs returns every PDB in the namespace. Used to detect
+// pre-existing user PDBs covering a controller we'd otherwise PDB-protect.
+func listNamespacePDBs(ctx context.Context, clientset kubernetes.Interface, namespace string) ([]policyv1.PodDisruptionBudget, error) {
+	list, err := clientset.PolicyV1().PodDisruptionBudgets(namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return nil, err
+	}
+	return list.Items, nil
+}
+
+// hasUserPDB returns true when an existing non-temporary PDB has the same
+// selector as the controller's pod selector. This is a conservative
+// equality check: a broader user PDB will NOT be detected, and we'll create
+// our own. Eviction will then respect the most restrictive of the two,
+// preserving the user's intent.
+func hasUserPDB(existing []policyv1.PodDisruptionBudget, controllerSelector *metav1.LabelSelector) bool {
+	if controllerSelector == nil {
+		return false
+	}
+	return slices.ContainsFunc(existing, func(pdb policyv1.PodDisruptionBudget) bool {
+		return !isTemporaryPDB(&pdb) && reflect.DeepEqual(pdb.Spec.Selector, controllerSelector)
+	})
+}
+
+func isTemporaryPDB(pdb *policyv1.PodDisruptionBudget) bool {
+	return pdb.Labels[pdbManagedByLabelKey] == pdbManagedByLabelValue &&
+		pdb.Labels[pdbTempLabelKey] == pdbTempLabelValue
+}
+
+// createTempPDB writes (or no-ops if our PDB already exists) a temporary
+// PodDisruptionBudget with maxUnavailable: 1. Existing PDBs that aren't ours
+// at the same name are left alone with a logged warning — that's a name
+// collision the user must resolve.
+func createTempPDB(ctx context.Context, ctrlClient client.Client, c controllerInfo, dryRun bool) error {
+	name := tempPDBName(c.Kind, c.Name)
+	if dryRun {
+		log.Printf("[dry-run] would create PDB %s/%s (maxUnavailable: 1, selector: %s)", c.Namespace, name, formatSelector(c.Selector))
+		return nil
+	}
+
+	existing := &policyv1.PodDisruptionBudget{}
+	err := ctrlClient.Get(ctx, client.ObjectKey{Namespace: c.Namespace, Name: name}, existing)
+	switch {
+	case err == nil:
+		if !isTemporaryPDB(existing) {
+			log.Printf("Warning: PDB %s/%s exists but is not labelled as temporary; leaving it untouched", c.Namespace, name)
+			return nil
+		}
+		// Our PDB from a previous (possibly crashed) run. Leave as-is; the
+		// cleanup step will remove it at the end of the current run.
+		return nil
+	case !apierrors.IsNotFound(err):
+		return fmt.Errorf("failed to get PDB %s/%s: %w", c.Namespace, name, err)
+	}
+
+	maxUnavailable := intstr.FromInt(1)
+	pdb := &policyv1.PodDisruptionBudget{
+		TypeMeta: metav1.TypeMeta{APIVersion: "policy/v1", Kind: "PodDisruptionBudget"},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: c.Namespace,
+			Labels: map[string]string{
+				pdbManagedByLabelKey: pdbManagedByLabelValue,
+				pdbTempLabelKey:      pdbTempLabelValue,
+			},
+		},
+		Spec: policyv1.PodDisruptionBudgetSpec{
+			Selector:       c.Selector.DeepCopy(),
+			MaxUnavailable: &maxUnavailable,
+		},
+	}
+	if err := ctrlClient.Create(ctx, pdb); err != nil {
+		return fmt.Errorf("failed to create PDB %s/%s: %w", c.Namespace, name, err)
+	}
+	log.Printf("Created temporary PDB %s/%s for %s/%s (maxUnavailable: 1).", c.Namespace, name, c.Kind, c.Name)
+	return nil
+}
+
+// tempPDBName builds a DNS-label-safe PDB name. Long controller names are
+// truncated so the final name (including the suffix) fits the 63-char limit.
+func tempPDBName(kind, controllerName string) string {
+	prefix := strings.ToLower(kind) + "-" + controllerName
+	if len(prefix)+len(pdbNameSuffix) > validation.DNS1123LabelMaxLength {
+		prefix = prefix[:validation.DNS1123LabelMaxLength-len(pdbNameSuffix)]
+	}
+	return prefix + pdbNameSuffix
+}
+
+func formatSelector(s *metav1.LabelSelector) string {
+	if s == nil {
+		return "<nil>"
+	}
+	parts := lo.MapToSlice(s.MatchLabels, func(k, v string) string {
+		return k + "=" + v
+	})
+	return strings.Join(parts, ",")
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/pdb_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/pdb_test.go
new file mode 100644
index 0000000000..c5fbd3d09a
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/pdb_test.go
@@ -0,0 +1,367 @@
+package evict
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	policyv1 "k8s.io/api/policy/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/client-go/kubernetes/fake"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/utils/ptr"
+	ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
+	ctrlfake "sigs.k8s.io/controller-runtime/pkg/client/fake"
+)
+
+func newCtrlScheme(t *testing.T) *runtime.Scheme {
+	t.Helper()
+	sch := runtime.NewScheme()
+	require.NoError(t, scheme.AddToScheme(sch))
+	return sch
+}
+
+func TestTempPDBName(t *testing.T) {
+	for _, tc := range []struct {
+		kind   string
+		name   string
+		expect string
+	}{
+		{"Deployment", "my-app", "deployment-my-app-evict-legacy"},
+		{"StatefulSet", "db", "statefulset-db-evict-legacy"},
+		// Long name that must be truncated to fit 63 chars
+		{"Deployment", strings.Repeat("a", 80), "deployment-" + strings.Repeat("a", 63-len("deployment-")-len(pdbNameSuffix)) + pdbNameSuffix},
+	} {
+		got := tempPDBName(tc.kind, tc.name)
+		assert.LessOrEqual(t, len(got), 63, "tempPDBName output exceeds 63 chars: %s", got)
+		assert.Equal(t, tc.expect, got)
+	}
+}
+
+func TestIsTemporaryPDB(t *testing.T) {
+	for _, tc := range []struct {
+		name   string
+		labels map[string]string
+		want   bool
+	}{
+		{
+			name: "both labels present",
+			labels: map[string]string{
+				pdbManagedByLabelKey: pdbManagedByLabelValue,
+				pdbTempLabelKey:      pdbTempLabelValue,
+			},
+			want: true,
+		},
+		{name: "no labels", labels: map[string]string{"app": "x"}, want: false},
+		{
+			name:   "only managed-by label, must require BOTH",
+			labels: map[string]string{pdbManagedByLabelKey: pdbManagedByLabelValue},
+			want:   false,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			pdb := &policyv1.PodDisruptionBudget{ObjectMeta: metav1.ObjectMeta{Labels: tc.labels}}
+			assert.Equal(t, tc.want, isTemporaryPDB(pdb))
+		})
+	}
+}
+
+func TestHasUserPDB(t *testing.T) {
+	matchingSelector := &metav1.LabelSelector{MatchLabels: map[string]string{"app": "x"}}
+	otherSelector := &metav1.LabelSelector{MatchLabels: map[string]string{"app": "y"}}
+	userPDB := policyv1.PodDisruptionBudget{
+		ObjectMeta: metav1.ObjectMeta{Name: "user", Namespace: "default"},
+		Spec:       policyv1.PodDisruptionBudgetSpec{Selector: matchingSelector},
+	}
+	tempPDB := policyv1.PodDisruptionBudget{
+		ObjectMeta: metav1.ObjectMeta{Name: "temp", Namespace: "default", Labels: map[string]string{
+			pdbManagedByLabelKey: pdbManagedByLabelValue,
+			pdbTempLabelKey:      pdbTempLabelValue,
+		}},
+		Spec: policyv1.PodDisruptionBudgetSpec{Selector: matchingSelector},
+	}
+
+	for _, tc := range []struct {
+		name               string
+		existing           []policyv1.PodDisruptionBudget
+		controllerSelector *metav1.LabelSelector
+		want               bool
+	}{
+		{name: "matching user PDB", existing: []policyv1.PodDisruptionBudget{userPDB}, controllerSelector: matchingSelector, want: true},
+		{name: "temp PDB ignored", existing: []policyv1.PodDisruptionBudget{tempPDB}, controllerSelector: matchingSelector, want: false},
+		{name: "nil controller selector", existing: []policyv1.PodDisruptionBudget{userPDB}, controllerSelector: nil, want: false},
+		{name: "different selector", existing: []policyv1.PodDisruptionBudget{userPDB}, controllerSelector: otherSelector, want: false},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.want, hasUserPDB(tc.existing, tc.controllerSelector))
+		})
+	}
+}
+
+func TestCleanupTempPDBs(t *testing.T) {
+	tempPDB := func() *policyv1.PodDisruptionBudget {
+		return &policyv1.PodDisruptionBudget{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "temp", Namespace: "default",
+				Labels: map[string]string{
+					pdbManagedByLabelKey: pdbManagedByLabelValue,
+					pdbTempLabelKey:      pdbTempLabelValue,
+				},
+			},
+		}
+	}
+	userPDB := &policyv1.PodDisruptionBudget{
+		ObjectMeta: metav1.ObjectMeta{Name: "user", Namespace: "default"},
+	}
+	otherManaged := &policyv1.PodDisruptionBudget{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "other", Namespace: "kube-system",
+			Labels: map[string]string{pdbManagedByLabelKey: "some-other-tool"},
+		},
+	}
+
+	for _, tc := range []struct {
+		name string
+		// existing seeds the controller-runtime fake client.
+		existing []ctrlclient.Object
+		dryRun   bool
+		// wantRemaining is the expected `<namespace>/<name>` set after cleanup.
+		wantRemaining []string
+	}{
+		{
+			// Only PDBs carrying BOTH temp labels are removed; user PDBs
+			// and PDBs from other tools stay put.
+			name:          "deletes only fully-labelled temp PDBs",
+			existing:      []ctrlclient.Object{tempPDB(), userPDB, otherManaged},
+			wantRemaining: []string{"default/user", "kube-system/other"},
+		},
+		{
+			name:          "no temp PDBs is a no-op",
+			existing:      []ctrlclient.Object{userPDB},
+			wantRemaining: []string{"default/user"},
+		},
+		{
+			name:          "dry-run does not delete",
+			existing:      []ctrlclient.Object{tempPDB()},
+			dryRun:        true,
+			wantRemaining: []string{"default/temp"},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			cli := ctrlfake.NewClientBuilder().WithScheme(newCtrlScheme(t)).WithObjects(tc.existing...).Build()
+
+			require.NoError(t, cleanupTempPDBs(t.Context(), cli, tc.dryRun))
+
+			list := &policyv1.PodDisruptionBudgetList{}
+			require.NoError(t, cli.List(t.Context(), list))
+			names := make([]string, 0, len(list.Items))
+			for _, p := range list.Items {
+				names = append(names, p.Namespace+"/"+p.Name)
+			}
+			assert.ElementsMatch(t, tc.wantRemaining, names)
+		})
+	}
+}
+
+func TestCreateTempPDB(t *testing.T) {
+	appController := controllerInfo{
+		controllerKey: controllerKey{Namespace: "default", Kind: "Deployment", Name: "app"},
+		Selector:      &metav1.LabelSelector{MatchLabels: map[string]string{"app": "x"}},
+	}
+	existingTempPDB := func() *policyv1.PodDisruptionBudget {
+		return &policyv1.PodDisruptionBudget{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: tempPDBName("Deployment", "app"), Namespace: "default",
+				Labels: map[string]string{
+					pdbManagedByLabelKey: pdbManagedByLabelValue,
+					pdbTempLabelKey:      pdbTempLabelValue,
+				},
+			},
+			Spec: policyv1.PodDisruptionBudgetSpec{
+				MaxUnavailable: ptr.To(intstr.FromInt(1)),
+				Selector:       &metav1.LabelSelector{MatchLabels: map[string]string{"app": "x"}},
+			},
+		}
+	}
+	userPDBAtSameName := &policyv1.PodDisruptionBudget{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: tempPDBName("Deployment", "app"), Namespace: "default",
+			// no temp-pdb labels: looks like a user PDB that happens to share
+			// our naming.
+		},
+	}
+
+	// ownership describes the expected ownership of the surviving PDB.
+	type ownership int
+	const (
+		expectNoPDB   ownership = iota // 0 PDBs in cluster
+		expectOurs                     // 1 PDB, with our temp labels + MaxUnavailable=1
+		expectUserPDB                  // 1 PDB, NOT carrying our labels
+		expectAny                      // 1 PDB, ownership irrelevant (idempotent case)
+	)
+
+	for _, tc := range []struct {
+		name string
+		// existing seeds the controller-runtime fake client.
+		existing []ctrlclient.Object
+		dryRun   bool
+		// expect describes the expected post-create cluster state.
+		expect ownership
+		// runCleanupAfter, when true, runs cleanupTempPDBs after createTempPDB
+		// and asserts the cluster ends up empty — exercises the crash-recovery
+		// loop where a previous run left a temp PDB behind.
+		runCleanupAfter bool
+	}{
+		{
+			name:   "creates PDB when missing",
+			expect: expectOurs,
+		},
+		{
+			// Crash-recovery scenario: a previous run created a temp PDB
+			// and was killed. A fresh ensure must NOT duplicate it, and
+			// cleanup must still remove it.
+			name:            "idempotent on pre-existing temp PDB",
+			existing:        []ctrlclient.Object{existingTempPDB()},
+			expect:          expectAny,
+			runCleanupAfter: true,
+		},
+		{
+			// User-owned PDB at the same name (no temp labels) must be
+			// left untouched.
+			name:     "name collision with user PDB leaves it untouched",
+			existing: []ctrlclient.Object{userPDBAtSameName},
+			expect:   expectUserPDB,
+		},
+		{
+			name:   "dry-run does not create",
+			dryRun: true,
+			expect: expectNoPDB,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			cli := ctrlfake.NewClientBuilder().WithScheme(newCtrlScheme(t)).WithObjects(tc.existing...).Build()
+
+			require.NoError(t, createTempPDB(t.Context(), cli, appController, tc.dryRun))
+
+			list := &policyv1.PodDisruptionBudgetList{}
+			require.NoError(t, cli.List(t.Context(), list))
+			switch tc.expect {
+			case expectNoPDB:
+				assert.Empty(t, list.Items)
+			case expectOurs:
+				require.Len(t, list.Items, 1)
+				pdb := list.Items[0]
+				assert.Equal(t, pdbManagedByLabelValue, pdb.Labels[pdbManagedByLabelKey])
+				assert.Equal(t, pdbTempLabelValue, pdb.Labels[pdbTempLabelKey])
+				require.NotNil(t, pdb.Spec.MaxUnavailable)
+				assert.Equal(t, int32(1), pdb.Spec.MaxUnavailable.IntVal)
+			case expectUserPDB:
+				require.Len(t, list.Items, 1)
+				assert.NotEqual(t, pdbManagedByLabelValue, list.Items[0].Labels[pdbManagedByLabelKey])
+			case expectAny:
+				require.Len(t, list.Items, 1)
+			}
+
+			if tc.runCleanupAfter {
+				require.NoError(t, cleanupTempPDBs(t.Context(), cli, false))
+				require.NoError(t, cli.List(t.Context(), list))
+				assert.Empty(t, list.Items, "cleanup must remove pre-existing temp PDB even with no state from ensure")
+			}
+		})
+	}
+}
+
+// TestDiscoverControllers_FiltersByNodeSet locks in the cluster-wide-list +
+// client-side filter shape of discoverControllers: pods on target nodes
+// resolve to their top-level controllers, pods elsewhere are ignored, and
+// duplicate controllers (multiple pods of one Deployment) collapse to a
+// single entry.
+func TestDiscoverControllers_FiltersByNodeSet(t *testing.T) {
+	mkDeployment := func(name, ns, appLabel string) *appsv1.Deployment {
+		return &appsv1.Deployment{
+			ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+			Spec: appsv1.DeploymentSpec{
+				Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": appLabel}},
+			},
+		}
+	}
+	mkReplicaSet := func(name, ns, owningDeployment string) *appsv1.ReplicaSet {
+		return &appsv1.ReplicaSet{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: name, Namespace: ns,
+				OwnerReferences: []metav1.OwnerReference{{Kind: "Deployment", Name: owningDeployment, Controller: ptr.To(true)}},
+			},
+		}
+	}
+	// mkBareReplicaSet is a ReplicaSet with no owning Deployment — its pods
+	// resolve to the ReplicaSet itself as the top-level controller.
+	mkBareReplicaSet := func(name, ns, appLabel string) *appsv1.ReplicaSet {
+		return &appsv1.ReplicaSet{
+			ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+			Spec: appsv1.ReplicaSetSpec{
+				Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": appLabel}},
+			},
+		}
+	}
+	mkStatefulSet := func(name, ns, appLabel string) *appsv1.StatefulSet {
+		return &appsv1.StatefulSet{
+			ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+			Spec: appsv1.StatefulSetSpec{
+				Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": appLabel}},
+			},
+		}
+	}
+	mkPod := func(name, ns, nodeName, ownerKind, ownerName string) *corev1.Pod {
+		return &corev1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: name, Namespace: ns,
+				OwnerReferences: []metav1.OwnerReference{{Kind: ownerKind, Name: ownerName, Controller: ptr.To(true)}},
+			},
+			Spec: corev1.PodSpec{NodeName: nodeName},
+		}
+	}
+
+	client := fake.NewClientset(
+		mkDeployment("target-app", "default", "target"),
+		mkReplicaSet("target-app-abc", "default", "target-app"),
+		// two pods of the same Deployment, both on the target node — must dedup.
+		mkPod("target-pod-1", "default", "target-node", "ReplicaSet", "target-app-abc"),
+		mkPod("target-pod-2", "default", "target-node", "ReplicaSet", "target-app-abc"),
+		// StatefulSet pod on the target node — resolves to the StatefulSet.
+		mkStatefulSet("target-sts", "default", "target-sts"),
+		mkPod("target-sts-0", "default", "target-node", "StatefulSet", "target-sts"),
+		// bare ReplicaSet (no owning Deployment) on the target node — resolves
+		// to the ReplicaSet itself.
+		mkBareReplicaSet("target-bare-rs", "default", "target-bare"),
+		mkPod("target-bare-rs-xyz", "default", "target-node", "ReplicaSet", "target-bare-rs"),
+		// DaemonSet-owned pod on the target node — the default switch arm
+		// returns no controller (DaemonSets get no temporary PDB).
+		mkPod("target-ds-pod", "default", "target-node", "DaemonSet", "target-ds"),
+		// orphan pod (no controller owner) on the target node — ignored.
+		&corev1.Pod{
+			ObjectMeta: metav1.ObjectMeta{Name: "target-orphan", Namespace: "default"},
+			Spec:       corev1.PodSpec{NodeName: "target-node"},
+		},
+		// pod on a non-target node, must be ignored.
+		mkDeployment("off-target-app", "default", "off"),
+		mkReplicaSet("off-target-app-def", "default", "off-target-app"),
+		mkPod("off-target-pod", "default", "other-node", "ReplicaSet", "off-target-app-def"),
+	)
+
+	controllers, err := discoverControllers(t.Context(), client, map[string]struct{}{"target-node": {}})
+	require.NoError(t, err)
+	got := make(map[string]string, len(controllers)) // name -> kind
+	for _, c := range controllers {
+		got[c.Name] = c.Kind
+	}
+	assert.Equal(t, map[string]string{
+		"target-app":     "Deployment",
+		"target-sts":     "StatefulSet",
+		"target-bare-rs": "ReplicaSet",
+	}, got)
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/plan.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/plan.go
new file mode 100644
index 0000000000..5c5e7418ce
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/plan.go
@@ -0,0 +1,157 @@
+// Package evict implements the `kubectl datadog autoscaling cluster
+// evict-legacy-nodes` subcommand. It drains workloads from nodes that are not
+// managed by the Datadog Karpenter NodePools (cluster-autoscaler ASGs, EKS
+// managed node groups, user-created Karpenter NodePools, standalone EC2
+// instances) and scales those node groups to zero so that future capacity is
+// provisioned exclusively by the NodePools created by
+// `kubectl datadog autoscaling cluster install`.
+package evict
+
+import (
+	"errors"
+	"fmt"
+	"maps"
+	"slices"
+	"strings"
+
+	"github.com/samber/lo"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+// Target identifies a single node-management entity to evict. The Entity field
+// is the bucket name within `ClusterInfo.NodeManagement[Manager]` — an ASG
+// name, an EKS managed node group name, a Karpenter NodePool name, or the
+// empty string for the standalone bucket.
+type Target struct {
+	Manager clusterinfo.NodeManager
+	Entity  string
+	Nodes   []string
+}
+
+// ParseTargetSpec parses a CLI `--target` value into a Target with an empty
+// Nodes slice (Nodes are filled in by BuildPlan from the cluster snapshot).
+//
+// Format: `<manager>/<entity>`, with the entity omitted for the standalone
+// manager. Fargate and unknown are rejected here — their migration is out of
+// scope of this command.
+func ParseTargetSpec(raw string) (Target, error) {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return Target{}, errors.New("--target value cannot be empty")
+	}
+	mgrStr, entity, hasSlash := strings.Cut(raw, "/")
+	mgr := clusterinfo.NodeManager(mgrStr)
+	switch mgr {
+	case clusterinfo.NodeManagerASG,
+		clusterinfo.NodeManagerEKSManagedNodeGroup,
+		clusterinfo.NodeManagerKarpenter:
+		if !hasSlash || entity == "" {
+			return Target{}, fmt.Errorf("--target=%s requires a name: %s/<name>", mgrStr, mgrStr)
+		}
+		return Target{Manager: mgr, Entity: entity}, nil
+	case clusterinfo.NodeManagerStandalone:
+		if hasSlash && entity != "" {
+			return Target{}, fmt.Errorf("--target=standalone does not take a name (got %q)", entity)
+		}
+		return Target{Manager: mgr, Entity: ""}, nil
+	case clusterinfo.NodeManagerFargate:
+		return Target{}, errors.New("--target=fargate is not supported: manage Fargate profiles directly via AWS")
+	case clusterinfo.NodeManagerUnknown:
+		return Target{}, errors.New("--target=unknown is not supported: nodes with unknown providers cannot be migrated automatically")
+	default:
+		return Target{}, fmt.Errorf("--target=%s: unknown manager type (supported: asg, eksManagedNodeGroup, karpenter, standalone)", mgrStr)
+	}
+}
+
+// BuildPlan turns user intent (--all or one or more --target flags) into the
+// list of Targets the orchestrator will evict. Entries flagged ManagedByDatadog
+// are filtered out (protected by definition); fargate and unknown managers are
+// skipped in --all mode (their lifecycle is handled elsewhere or unknown).
+//
+// In targeted mode, user input errors are collected and returned together so
+// the user can fix all mistakes in a single iteration.
+func BuildPlan(info *clusterinfo.ClusterInfo, all bool, specs []Target) ([]Target, error) {
+	if info == nil {
+		return nil, errors.New("cluster info is nil")
+	}
+	if all {
+		return buildAllPlan(info), nil
+	}
+	return buildTargetedPlan(info, specs)
+}
+
+func buildAllPlan(info *clusterinfo.ClusterInfo) []Target {
+	var targets []Target
+	for _, mgr := range []clusterinfo.NodeManager{
+		clusterinfo.NodeManagerKarpenter,
+		clusterinfo.NodeManagerEKSManagedNodeGroup,
+		clusterinfo.NodeManagerASG,
+		clusterinfo.NodeManagerStandalone,
+	} {
+		bucket, ok := info.NodeManagement[mgr]
+		if !ok {
+			continue
+		}
+		targets = append(targets, lo.FilterMap(slices.Sorted(maps.Keys(bucket)), func(name string, _ int) (Target, bool) {
+			entry := bucket[name]
+			if entry.ManagedByDatadog {
+				return Target{}, false
+			}
+			return Target{
+				Manager: mgr,
+				Entity:  name,
+				Nodes:   entry.Nodes,
+			}, true
+		})...)
+	}
+	return targets
+}
+
+func buildTargetedPlan(info *clusterinfo.ClusterInfo, specs []Target) ([]Target, error) {
+	if len(specs) == 0 {
+		return nil, errors.New("at least one --target must be provided, or --all")
+	}
+	var (
+		targets []Target
+		errs    []error
+	)
+	for _, t := range specs {
+		bucket, ok := info.NodeManagement[t.Manager]
+		if !ok {
+			errs = append(errs, fmt.Errorf("--target=%s/%s: no %s entities found in the cluster snapshot", t.Manager, t.Entity, t.Manager))
+			continue
+		}
+		entry, ok := bucket[t.Entity]
+		if !ok {
+			errs = append(errs, fmt.Errorf("--target=%s/%s: entity not found in the cluster snapshot", t.Manager, t.Entity))
+			continue
+		}
+		if entry.ManagedByDatadog {
+			errs = append(errs, fmt.Errorf("--target=%s/%s: this entity is managed by Datadog and cannot be evicted", t.Manager, t.Entity))
+			continue
+		}
+		targets = append(targets, Target{
+			Manager: t.Manager,
+			Entity:  t.Entity,
+			Nodes:   entry.Nodes,
+		})
+	}
+	if len(errs) > 0 {
+		return nil, errors.Join(errs...)
+	}
+	return targets, nil
+}
+
+// hasDatadogManagedNodePool reports whether the snapshot lists at least one
+// Karpenter NodePool flagged as Datadog-managed. The orchestrator uses this as
+// a pre-flight guard: if no destination NodePool exists, scaling legacy
+// capacity to zero would leave the cluster with no working capacity.
+func hasDatadogManagedNodePool(info *clusterinfo.ClusterInfo) bool {
+	for _, entry := range info.NodeManagement[clusterinfo.NodeManagerKarpenter] {
+		if entry.ManagedByDatadog {
+			return true
+		}
+	}
+	return false
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/plan_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/plan_test.go
new file mode 100644
index 0000000000..8fe110258e
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/plan_test.go
@@ -0,0 +1,218 @@
+package evict
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+func TestParseTargetSpec(t *testing.T) {
+	for _, tc := range []struct {
+		name      string
+		input     string
+		want      Target
+		wantError string
+	}{
+		{name: "asg", input: "asg/legacy-asg", want: Target{Manager: clusterinfo.NodeManagerASG, Entity: "legacy-asg"}},
+		{name: "eks-mng", input: "eksManagedNodeGroup/standard-2", want: Target{Manager: clusterinfo.NodeManagerEKSManagedNodeGroup, Entity: "standard-2"}},
+		{name: "karpenter", input: "karpenter/user-pool", want: Target{Manager: clusterinfo.NodeManagerKarpenter, Entity: "user-pool"}},
+		{name: "standalone-no-slash", input: "standalone", want: Target{Manager: clusterinfo.NodeManagerStandalone, Entity: ""}},
+		{name: "standalone-empty-after-slash", input: "standalone/", want: Target{Manager: clusterinfo.NodeManagerStandalone, Entity: ""}},
+		{name: "leading-trailing-spaces", input: "  asg/foo  ", want: Target{Manager: clusterinfo.NodeManagerASG, Entity: "foo"}},
+
+		{name: "empty", input: "", wantError: "cannot be empty"},
+		{name: "asg-no-name", input: "asg", wantError: "requires a name"},
+		{name: "asg-empty-name", input: "asg/", wantError: "requires a name"},
+		{name: "standalone-with-name", input: "standalone/foo", wantError: "does not take a name"},
+		{name: "fargate-rejected", input: "fargate/profile", wantError: "not supported"},
+		{name: "unknown-rejected", input: "unknown/x", wantError: "not supported"},
+		{name: "garbage", input: "blob/x", wantError: "unknown manager"},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			got, err := ParseTargetSpec(tc.input)
+			if tc.wantError != "" {
+				require.Error(t, err)
+				assert.Contains(t, err.Error(), tc.wantError)
+				return
+			}
+			require.NoError(t, err)
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
+func TestBuildPlan(t *testing.T) {
+	mixedInfo := &clusterinfo.ClusterInfo{
+		NodeManagement: map[clusterinfo.NodeManager]map[string]clusterinfo.NodeManagerEntry{
+			clusterinfo.NodeManagerASG: {
+				"legacy-asg-a": {Nodes: []string{"ip-1", "ip-2"}, ManagedByDatadog: false},
+				"legacy-asg-b": {Nodes: []string{"ip-3"}, ManagedByDatadog: false},
+			},
+			clusterinfo.NodeManagerEKSManagedNodeGroup: {
+				"mng-1":         {Nodes: []string{"ip-4"}, ManagedByDatadog: false},
+				"datadog-owned": {Nodes: []string{"ip-5"}, ManagedByDatadog: true},
+			},
+			clusterinfo.NodeManagerKarpenter: {
+				"user-np": {Nodes: []string{"ip-6"}, ManagedByDatadog: false},
+				"dd-np":   {Nodes: []string{"ip-7"}, ManagedByDatadog: true},
+			},
+			clusterinfo.NodeManagerStandalone: {
+				"": {Nodes: []string{"ip-8"}, ManagedByDatadog: false},
+			},
+			clusterinfo.NodeManagerFargate: {
+				"profile-1": {Nodes: []string{"fargate-ip-9"}, ManagedByDatadog: false},
+			},
+			clusterinfo.NodeManagerUnknown: {
+				"": {Nodes: []string{"weird-1"}, ManagedByDatadog: false},
+			},
+		},
+	}
+	asgOnlyInfo := &clusterinfo.ClusterInfo{
+		NodeManagement: map[clusterinfo.NodeManager]map[string]clusterinfo.NodeManagerEntry{
+			clusterinfo.NodeManagerASG: {
+				"my-asg": {Nodes: []string{"ip-1", "ip-2"}, ManagedByDatadog: false},
+			},
+		},
+	}
+	ddASGInfo := &clusterinfo.ClusterInfo{
+		NodeManagement: map[clusterinfo.NodeManager]map[string]clusterinfo.NodeManagerEntry{
+			clusterinfo.NodeManagerASG: {
+				"dd-asg": {Nodes: []string{"ip-1"}, ManagedByDatadog: true},
+			},
+		},
+	}
+
+	for _, tc := range []struct {
+		name    string
+		info    *clusterinfo.ClusterInfo
+		all     bool
+		targets []Target
+
+		wantErr         bool
+		wantErrContains string
+		// wantEntities, when non-nil, is the expected set of `<manager>/<entity>`
+		// strings in the returned plan. ElementsMatch is used (order
+		// irrelevant). nil disables the check.
+		wantEntities []string
+		// wantNodes maps entity name to its expected Nodes slice in the
+		// returned plan (only used in the targeted happy-path case).
+		wantNodes map[string][]string
+	}{
+		{
+			// --all: Datadog-managed entries dropped; fargate + unknown
+			// skipped entirely.
+			name: "--all returns every non-Datadog entity",
+			info: mixedInfo,
+			all:  true,
+			wantEntities: []string{
+				"karpenter/user-np",
+				"eksManagedNodeGroup/mng-1",
+				"asg/legacy-asg-a",
+				"asg/legacy-asg-b",
+				"standalone/",
+			},
+		},
+		{
+			name:         "targeted happy path",
+			info:         asgOnlyInfo,
+			targets:      []Target{{Manager: clusterinfo.NodeManagerASG, Entity: "my-asg"}},
+			wantEntities: []string{"asg/my-asg"},
+			wantNodes:    map[string][]string{"my-asg": {"ip-1", "ip-2"}},
+		},
+		{
+			name:            "missing entity errors",
+			info:            ddASGInfo,
+			targets:         []Target{{Manager: clusterinfo.NodeManagerASG, Entity: "ghost"}},
+			wantErr:         true,
+			wantErrContains: "not found",
+		},
+		{
+			name:            "datadog-managed entity is refused",
+			info:            ddASGInfo,
+			targets:         []Target{{Manager: clusterinfo.NodeManagerASG, Entity: "dd-asg"}},
+			wantErr:         true,
+			wantErrContains: "managed by Datadog",
+		},
+		{
+			name:            "missing manager bucket errors",
+			info:            ddASGInfo,
+			targets:         []Target{{Manager: clusterinfo.NodeManagerKarpenter, Entity: "x"}},
+			wantErr:         true,
+			wantErrContains: "no karpenter entities",
+		},
+		{
+			name:            "no targets and not --all errors",
+			info:            ddASGInfo,
+			wantErr:         true,
+			wantErrContains: "at least one --target",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			got, err := BuildPlan(tc.info, tc.all, tc.targets)
+			if tc.wantErr {
+				require.Error(t, err)
+				if tc.wantErrContains != "" {
+					assert.Contains(t, err.Error(), tc.wantErrContains)
+				}
+				return
+			}
+			require.NoError(t, err)
+			if tc.wantEntities != nil {
+				entities := make([]string, 0, len(got))
+				for _, g := range got {
+					entities = append(entities, string(g.Manager)+"/"+g.Entity)
+				}
+				assert.ElementsMatch(t, tc.wantEntities, entities)
+			}
+			for entity, wantNodes := range tc.wantNodes {
+				found := false
+				for _, g := range got {
+					if g.Entity == entity {
+						assert.Equal(t, wantNodes, g.Nodes)
+						found = true
+						break
+					}
+				}
+				assert.True(t, found, "expected entity %q in plan", entity)
+			}
+		})
+	}
+}
+
+func TestHasDatadogManagedNodePool(t *testing.T) {
+	for _, tc := range []struct {
+		name string
+		info *clusterinfo.ClusterInfo
+		want bool
+	}{
+		{
+			name: "no karpenter bucket",
+			info: &clusterinfo.ClusterInfo{NodeManagement: map[clusterinfo.NodeManager]map[string]clusterinfo.NodeManagerEntry{}},
+			want: false,
+		},
+		{
+			name: "only user NodePools",
+			info: &clusterinfo.ClusterInfo{NodeManagement: map[clusterinfo.NodeManager]map[string]clusterinfo.NodeManagerEntry{
+				clusterinfo.NodeManagerKarpenter: {"user-np": {ManagedByDatadog: false}},
+			}},
+			want: false,
+		},
+		{
+			name: "at least one Datadog NodePool",
+			info: &clusterinfo.ClusterInfo{NodeManagement: map[clusterinfo.NodeManager]map[string]clusterinfo.NodeManagerEntry{
+				clusterinfo.NodeManagerKarpenter: {
+					"user-np": {ManagedByDatadog: false},
+					"dd-np":   {ManagedByDatadog: true},
+				},
+			}},
+			want: true,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.want, hasDatadogManagedNodePool(tc.info))
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/preflight.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/preflight.go
new file mode 100644
index 0000000000..1ae27ec17c
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/preflight.go
@@ -0,0 +1,79 @@
+package evict
+
+import (
+	"context"
+	"fmt"
+	"log"
+
+	"github.com/fatih/color"
+	"github.com/samber/lo"
+	"k8s.io/apimachinery/pkg/api/meta"
+	"k8s.io/cli-runtime/pkg/genericclioptions"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+// ddNodePoolCreatedLabel is the label every Datadog autoscaling product (this
+// CLI and the cluster agent) puts on the Karpenter NodePools it creates. Used
+// here to identify "Datadog-side" NodePools when comparing weights against
+// user-side ones.
+const ddNodePoolCreatedLabel = "autoscaling.datadoghq.com/created"
+
+// runPreflightWarnings prints non-blocking warnings for situations that don't
+// prevent eviction but may surprise the operator after the fact. Pure
+// best-effort — failures here are logged but do not abort the run.
+func runPreflightWarnings(ctx context.Context, streams genericclioptions.IOStreams, ctrlClient client.Client, targets []Target) {
+	warnKarpenterWeightConflicts(ctx, streams, ctrlClient, targets)
+}
+
+// warnKarpenterWeightConflicts compares the spec.weight of each user-managed
+// Karpenter NodePool we are about to drain against the max spec.weight among
+// Datadog-managed NodePools. When a user NodePool has a weight >= the Datadog
+// max, freshly evicted pods may be re-scheduled onto a new node from the
+// same user NodePool, defeating the migration.
+func warnKarpenterWeightConflicts(ctx context.Context, streams genericclioptions.IOStreams, ctrlClient client.Client, targets []Target) {
+	userNPNames := lo.FilterMap(targets, func(t Target, _ int) (string, bool) {
+		return t.Entity, t.Manager == clusterinfo.NodeManagerKarpenter
+	})
+	if len(userNPNames) == 0 {
+		return
+	}
+
+	list := &karpv1.NodePoolList{}
+	if err := ctrlClient.List(ctx, list); err != nil {
+		if meta.IsNoMatchError(err) {
+			return
+		}
+		log.Printf("Warning: failed to list NodePools for pre-flight weight check: %v", err)
+		return
+	}
+
+	var ddMaxWeight int32
+	weightByName := make(map[string]int32, len(list.Items))
+	for i := range list.Items {
+		np := &list.Items[i]
+		var w int32
+		if np.Spec.Weight != nil {
+			w = *np.Spec.Weight
+		}
+		weightByName[np.Name] = w
+		if np.Labels[ddNodePoolCreatedLabel] == "true" && w > ddMaxWeight {
+			ddMaxWeight = w
+		}
+	}
+
+	for _, name := range userNPNames {
+		w, ok := weightByName[name]
+		if !ok {
+			continue
+		}
+		if w >= ddMaxWeight {
+			fmt.Fprintln(streams.ErrOut, color.YellowString(
+				"⚠ NodePool %q has spec.weight=%d ≥ Datadog NodePool max weight=%d; evicted pods may be re-scheduled onto a freshly provisioned node from the same user NodePool. Consider raising the Datadog NodePool weight before retrying.",
+				name, w, ddMaxWeight,
+			))
+		}
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/preflight_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/preflight_test.go
new file mode 100644
index 0000000000..6d814179d1
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/preflight_test.go
@@ -0,0 +1,130 @@
+package evict
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/cli-runtime/pkg/genericclioptions"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/utils/ptr"
+	ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
+	ctrlfake "sigs.k8s.io/controller-runtime/pkg/client/fake"
+	karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+func newKarpenterScheme(t *testing.T) *runtime.Scheme {
+	t.Helper()
+	sch := runtime.NewScheme()
+	require.NoError(t, scheme.AddToScheme(sch))
+	gv := schema.GroupVersion{Group: "karpenter.sh", Version: "v1"}
+	sch.AddKnownTypes(gv, &karpv1.NodePool{}, &karpv1.NodePoolList{})
+	metav1.AddToGroupVersion(sch, gv)
+	return sch
+}
+
+func mkNodePool(name string, weight *int32, datadogManaged bool) *karpv1.NodePool {
+	labels := map[string]string{}
+	if datadogManaged {
+		labels[ddNodePoolCreatedLabel] = "true"
+	}
+	return &karpv1.NodePool{
+		ObjectMeta: metav1.ObjectMeta{Name: name, Labels: labels},
+		Spec:       karpv1.NodePoolSpec{Weight: weight},
+	}
+}
+
+func TestWarnKarpenterWeightConflicts(t *testing.T) {
+	for _, tc := range []struct {
+		name      string
+		nodePools []ctrlclient.Object
+		targets   []Target
+		// wantContains lists substrings that MUST appear in the stderr output.
+		wantContains []string
+		// wantWarnEmpty asserts the stderr output is empty (no warning fired).
+		wantWarnEmpty bool
+	}{
+		{
+			// No Karpenter target ⇒ nothing to warn about.
+			name: "no karpenter targets ⇒ no warning",
+			targets: []Target{
+				{Manager: clusterinfo.NodeManagerASG, Entity: "asg-1"},
+				{Manager: clusterinfo.NodeManagerStandalone, Entity: ""},
+			},
+			wantWarnEmpty: true,
+		},
+		{
+			// User NP weight < Datadog NP weight ⇒ no conflict, no warning.
+			name: "user weight below Datadog weight",
+			nodePools: []ctrlclient.Object{
+				mkNodePool("dd-np", ptr.To(int32(100)), true),
+				mkNodePool("user-np", ptr.To(int32(50)), false),
+			},
+			targets:       []Target{{Manager: clusterinfo.NodeManagerKarpenter, Entity: "user-np"}},
+			wantWarnEmpty: true,
+		},
+		{
+			// User NP weight > Datadog NP weight ⇒ warns (`>=` check).
+			name: "user weight above Datadog weight warns",
+			nodePools: []ctrlclient.Object{
+				mkNodePool("dd-np", ptr.To(int32(10)), true),
+				mkNodePool("user-np", ptr.To(int32(50)), false),
+			},
+			targets:      []Target{{Manager: clusterinfo.NodeManagerKarpenter, Entity: "user-np"}},
+			wantContains: []string{"user-np", "weight=50"},
+		},
+		{
+			// Both nil ⇒ both default to 0 ⇒ equal-weight conflict warns.
+			name: "nil weights both default to 0 (equal-weight conflict)",
+			nodePools: []ctrlclient.Object{
+				mkNodePool("dd-np", nil, true),
+				mkNodePool("user-np", nil, false),
+			},
+			targets:      []Target{{Manager: clusterinfo.NodeManagerKarpenter, Entity: "user-np"}},
+			wantContains: []string{"user-np"},
+		},
+		{
+			// Target name doesn't match any NodePool in the cluster ⇒
+			// nothing to compare against, no panic.
+			name: "target not in cluster is ignored",
+			nodePools: []ctrlclient.Object{
+				mkNodePool("dd-np", ptr.To(int32(100)), true),
+			},
+			targets:       []Target{{Manager: clusterinfo.NodeManagerKarpenter, Entity: "ghost-np"}},
+			wantWarnEmpty: true,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			cli := ctrlfake.NewClientBuilder().
+				WithScheme(newKarpenterScheme(t)).
+				WithObjects(tc.nodePools...).
+				Build()
+			streams, _, _, errBuf := genericclioptions.NewTestIOStreams()
+
+			warnKarpenterWeightConflicts(t.Context(), streams, cli, tc.targets)
+
+			out := errBuf.String()
+			if tc.wantWarnEmpty {
+				assert.Empty(t, out)
+			}
+			for _, s := range tc.wantContains {
+				assert.Contains(t, out, s)
+			}
+		})
+	}
+}
+
+// TestRunPreflightWarnings_NoOp keeps the thin `runPreflightWarnings` wrapper
+// exercised separately so coverage doesn't regress as new pre-flight checks
+// are added.
+func TestRunPreflightWarnings_NoOp(t *testing.T) {
+	cli := ctrlfake.NewClientBuilder().WithScheme(newKarpenterScheme(t)).Build()
+	streams, _, _, errBuf := genericclioptions.NewTestIOStreams()
+	runPreflightWarnings(t.Context(), streams, cli, nil)
+	assert.Empty(t, errBuf.String())
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/prompt.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/prompt.go
new file mode 100644
index 0000000000..f467f3ad60
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/prompt.go
@@ -0,0 +1,65 @@
+package evict
+
+import (
+	"fmt"
+	"maps"
+	"slices"
+	"strings"
+
+	"github.com/fatih/color"
+	"github.com/samber/lo"
+	"k8s.io/cli-runtime/pkg/genericclioptions"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+// printPlan writes a human-readable description of the upcoming actions —
+// cluster-autoscaler scale-down, PDB creation, per-target evictions — to
+// streams.Out. Used both at confirmation time and in --dry-run mode.
+func printPlan(streams genericclioptions.IOStreams, info *clusterinfo.ClusterInfo, targets []Target, scaleCA, ensurePDBs bool) {
+	fmt.Fprintln(streams.Out, "\nThe following actions will be performed:")
+	if scaleCA && info != nil && info.Autoscaling.ClusterAutoscaler.Present {
+		fmt.Fprintf(streams.Out, "  • Scale cluster-autoscaler Deployment %s/%s to 0 replicas\n",
+			info.Autoscaling.ClusterAutoscaler.Namespace, info.Autoscaling.ClusterAutoscaler.Name)
+	}
+	if ensurePDBs {
+		fmt.Fprintln(streams.Out, "  • Create temporary PodDisruptionBudgets (maxUnavailable: 1) for workloads without one")
+	}
+
+	grouped := lo.GroupBy(targets, func(t Target) clusterinfo.NodeManager { return t.Manager })
+	// Sort the manager keys for stable, deterministic output.
+	for _, mgr := range slices.Sorted(maps.Keys(grouped)) {
+		ts := grouped[mgr]
+		fmt.Fprintf(streams.Out, "  • Evict %s entities:\n", mgr)
+		for _, t := range ts {
+			label := t.Entity
+			if label == "" {
+				label = "(all)"
+			}
+			fmt.Fprintf(streams.Out, "      ◦ %s (%d node(s))\n", label, len(t.Nodes))
+		}
+	}
+	if ensurePDBs {
+		fmt.Fprintln(streams.Out, "  • Remove the temporary PodDisruptionBudgets created above")
+	}
+	fmt.Fprintln(streams.Out, color.YellowString("\n⚠ This will drain workloads and terminate the underlying instances of non-Datadog node groups."))
+	fmt.Fprintln(streams.Out, color.YellowString("  Verify the Datadog Karpenter NodePool has enough capacity headroom for the migrated pods."))
+}
+
+// promptConfirmation reads a y/N answer from streams.In after the plan has
+// been printed. Returns true when the user confirmed (typed `y` or `yes`).
+// Read errors or any other input mean "declined" — unattended runs should
+// pass --yes to skip the prompt entirely.
+func promptConfirmation(streams genericclioptions.IOStreams) bool {
+	fmt.Fprint(streams.Out, "\nContinue? (y/N): ")
+	var response string
+	// Fscanln may return "unexpected newline" when the user just presses
+	// Enter; that's treated as decline below.
+	_, _ = fmt.Fscanln(streams.In, &response)
+	response = strings.ToLower(strings.TrimSpace(response))
+	confirmed := response == "y" || response == "yes"
+	if !confirmed {
+		fmt.Fprintln(streams.Out, "Eviction cancelled.")
+	}
+	return confirmed
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/prompt_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/prompt_test.go
new file mode 100644
index 0000000000..8e45fb346d
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/prompt_test.go
@@ -0,0 +1,135 @@
+package evict
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"k8s.io/cli-runtime/pkg/genericclioptions"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+func TestPrintPlan(t *testing.T) {
+	caPresent := clusterinfo.Autoscaling{
+		ClusterAutoscaler: clusterinfo.ClusterAutoscaler{Present: true, Namespace: "kube-system", Name: "cluster-autoscaler"},
+	}
+	caAbsent := clusterinfo.Autoscaling{ClusterAutoscaler: clusterinfo.ClusterAutoscaler{Present: false}}
+	fullTargets := []Target{
+		{Manager: clusterinfo.NodeManagerASG, Entity: "legacy-asg", Nodes: []string{"ip-1", "ip-2"}},
+		{Manager: clusterinfo.NodeManagerKarpenter, Entity: "user-np", Nodes: []string{"ip-3"}},
+		{Manager: clusterinfo.NodeManagerStandalone, Entity: "", Nodes: []string{"ip-4"}},
+	}
+
+	for _, tc := range []struct {
+		name string
+		// info, when non-nil, becomes the input. Use a sentinel "nil" via
+		// nilInfo=true to test the nil-tolerant path.
+		info       *clusterinfo.ClusterInfo
+		nilInfo    bool
+		targets    []Target
+		scaleCA    bool
+		ensurePDBs bool
+
+		wantContains    []string
+		wantNotContains []string
+	}{
+		{
+			name:       "all sections rendered",
+			info:       &clusterinfo.ClusterInfo{Autoscaling: caPresent},
+			targets:    fullTargets,
+			scaleCA:    true,
+			ensurePDBs: true,
+			wantContains: []string{
+				"cluster-autoscaler Deployment kube-system/cluster-autoscaler",
+				"Create temporary PodDisruptionBudgets",
+				"Remove the temporary PodDisruptionBudgets",
+				"Evict asg entities",
+				"legacy-asg (2 node(s))",
+				"Evict karpenter entities",
+				"user-np (1 node(s))",
+				"Evict standalone entities",
+				// Standalone entry with empty name renders as "(all)".
+				"(all) (1 node(s))",
+			},
+		},
+		{
+			// scaleCA=false hides the CA bullet even when CA is Present;
+			// ensurePDBs=false hides the PDB bullets.
+			name:            "scaleCA off and ensurePDBs off hide their bullets",
+			info:            &clusterinfo.ClusterInfo{Autoscaling: caPresent},
+			targets:         []Target{{Manager: clusterinfo.NodeManagerASG, Entity: "x"}},
+			scaleCA:         false,
+			ensurePDBs:      false,
+			wantNotContains: []string{"Scale cluster-autoscaler", "PodDisruptionBudgets"},
+		},
+		{
+			// CA not Present ⇒ no bullet regardless of scaleCA.
+			name:            "CA absent hides the bullet",
+			info:            &clusterinfo.ClusterInfo{Autoscaling: caAbsent},
+			targets:         nil,
+			scaleCA:         true,
+			ensurePDBs:      false,
+			wantNotContains: []string{"Scale cluster-autoscaler"},
+		},
+		{
+			// Nil info ⇒ no CA bullet but the PDB bullets and warning
+			// footer still appear.
+			name:            "nil info tolerated",
+			nilInfo:         true,
+			targets:         nil,
+			scaleCA:         true,
+			ensurePDBs:      true,
+			wantContains:    []string{"Create temporary PodDisruptionBudgets"},
+			wantNotContains: []string{"Scale cluster-autoscaler"},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			streams, _, outBuf, _ := genericclioptions.NewTestIOStreams()
+			info := tc.info
+			if tc.nilInfo {
+				info = nil
+			}
+			printPlan(streams, info, tc.targets, tc.scaleCA, tc.ensurePDBs)
+			out := outBuf.String()
+			for _, s := range tc.wantContains {
+				assert.Contains(t, out, s)
+			}
+			for _, s := range tc.wantNotContains {
+				assert.NotContains(t, out, s)
+			}
+		})
+	}
+}
+
+func TestPromptConfirmation(t *testing.T) {
+	for _, tc := range []struct {
+		name              string
+		input             string
+		wantOK            bool
+		wantCancelMessage bool
+	}{
+		{name: "y", input: "y", wantOK: true},
+		{name: "Y", input: "Y", wantOK: true},
+		{name: "yes", input: "yes", wantOK: true},
+		{name: "YES", input: "YES", wantOK: true},
+		{name: "Yes-with-newline", input: "Yes\n", wantOK: true},
+
+		{name: "n", input: "n", wantOK: false, wantCancelMessage: true},
+		{name: "N", input: "N", wantOK: false, wantCancelMessage: true},
+		{name: "no", input: "no", wantOK: false, wantCancelMessage: true},
+		{name: "empty", input: "", wantOK: false, wantCancelMessage: true},
+		{name: "garbage", input: "blah", wantOK: false, wantCancelMessage: true},
+		{name: "digit", input: "1", wantOK: false, wantCancelMessage: true},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			streams, stdin, outBuf, _ := genericclioptions.NewTestIOStreams()
+			stdin.WriteString(tc.input + "\n")
+			assert.Equal(t, tc.wantOK, promptConfirmation(streams))
+			if tc.wantCancelMessage {
+				out := outBuf.String()
+				assert.True(t, strings.Contains(out, "cancelled") || strings.Contains(out, "Cancelled"))
+			}
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/run.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/run.go
new file mode 100644
index 0000000000..f18d6d3fb4
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/run.go
@@ -0,0 +1,275 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"time"
+
+	"k8s.io/cli-runtime/pkg/genericclioptions"
+	"k8s.io/client-go/kubernetes"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clients"
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/display"
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/karpenter"
+)
+
+// RunOptions captures every parameter callers (the evict cobra command) feed
+// into Run. Built once by the command's run() method.
+type RunOptions struct {
+	ClusterName        string
+	KarpenterNamespace string // override; auto-detected when empty
+	All                bool
+	Targets            []Target // parsed --target=<type>/<name>
+	SkipCA             bool
+	EnsurePDBs         bool
+	DryRun             bool
+	Yes                bool
+	EvictionTimeout    time.Duration
+	NodeTimeout        time.Duration
+	Debug              bool
+}
+
+// Run is the orchestrator entry point. Idempotent: a second invocation on a
+// cluster already migrated returns success without changes. Crash-safe: a
+// killed run leaves no permanently corrupting state — re-running cleans up.
+func Run(ctx context.Context, streams genericclioptions.IOStreams, configFlags *genericclioptions.ConfigFlags, clientset *kubernetes.Clientset, opts RunOptions) error {
+	log.SetOutput(streams.ErrOut)
+	ctrl.SetLogger(zap.New(zap.UseDevMode(false), zap.WriteTo(streams.ErrOut)))
+
+	cli, err := clients.Build(ctx, configFlags, clientset)
+	if err != nil {
+		return fmt.Errorf("failed to build clients: %w", err)
+	}
+	if err = clients.ValidateAWSAccountConsistency(ctx, cli, opts.ClusterName, configFlags); err != nil {
+		return err
+	}
+	k, err := karpenter.FindInstallation(ctx, clientset)
+	if err != nil {
+		return fmt.Errorf("failed to check for an existing Karpenter installation: %w", err)
+	}
+	if k == nil {
+		return errors.New("Karpenter is not installed on this cluster; run `kubectl datadog autoscaling cluster install` first")
+	}
+	namespace := opts.KarpenterNamespace
+	if namespace == "" {
+		namespace = k.Namespace
+	}
+
+	display.PrintBox(streams.Out, "Evicting legacy nodes from cluster "+opts.ClusterName+".")
+
+	info, err := classify(ctx, clientset, cli, opts.ClusterName)
+	if err != nil {
+		return fmt.Errorf("failed to classify cluster nodes: %w", err)
+	}
+	if info.Autoscaling.EKSAutoMode.Enabled {
+		return errors.New("EKS auto-mode is enabled on this cluster; eviction is not applicable (auto-mode manages its own node lifecycle)")
+	}
+	// Skip the ConfigMap write on dry-run: a preview must not mutate the
+	// cluster (and must not require write RBAC on the Karpenter namespace).
+	if opts.DryRun {
+		log.Printf("[dry-run] would persist cluster-info to ConfigMap %s/%s", namespace, clusterinfo.ConfigMapName)
+	} else if err = clusterinfo.Persist(ctx, cli.K8sClient, namespace, info); err != nil {
+		log.Printf("Warning: failed to persist updated cluster-info ConfigMap: %v", err)
+	}
+
+	// Before any destructive work, verify there is at least one Datadog-managed
+	// NodePool to receive the evicted pods. Without this guard, scaling legacy
+	// capacity to 0 would leave the cluster with no working capacity (e.g. when
+	// `install` was run with --create-karpenter-resources=none, or the user
+	// deleted the Datadog NodePools out of band).
+	if !hasDatadogManagedNodePool(info) {
+		return errors.New("no Datadog-managed Karpenter NodePool found in the cluster snapshot; legacy nodes would have no destination to migrate to. Run `kubectl datadog autoscaling cluster install` (or `update`) to create the Datadog NodePool first")
+	}
+
+	targets, err := BuildPlan(info, opts.All, opts.Targets)
+	if err != nil {
+		return err
+	}
+	if len(targets) == 0 {
+		// A previous run may have been interrupted (e.g. an EKS managed node
+		// group timed out, leaving its temporary PDBs in place for a later
+		// rerun). By the time that rerun finds nothing left to evict, those
+		// leaked PDBs would otherwise persist indefinitely with
+		// maxUnavailable: 1 and throttle future rollouts/disruptions, so
+		// reclaim anything left behind before the no-op exit.
+		reclaimLeakedTempPDBs(ctx, cli.K8sClient, opts.EnsurePDBs, opts.DryRun)
+		display.PrintBox(streams.Out, "Nothing to evict — the cluster is already on Datadog-managed Karpenter NodePools.")
+		return nil
+	}
+
+	runPreflightWarnings(ctx, streams, cli.K8sClient, targets)
+	printPlan(streams, info, targets, !opts.SkipCA, opts.EnsurePDBs)
+
+	if !opts.Yes && !opts.DryRun {
+		if !promptConfirmation(streams) {
+			return nil
+		}
+	}
+
+	// Step 1: cluster-autoscaler scale-down — first, so it does not undo our
+	// work by provisioning new legacy nodes while we drain.
+	if !opts.SkipCA {
+		if err := scaleDownClusterAutoscaler(ctx, clientset, info.Autoscaling.ClusterAutoscaler, opts.DryRun); err != nil {
+			return fmt.Errorf("failed to scale down cluster-autoscaler: %w", err)
+		}
+	}
+
+	// Step 2: ensure temporary PDBs. On error, attempt cleanup of whatever
+	// landed before the failure — the temp PDBs carry our labels so the
+	// label-based cleanup picks them up. On a SIGINT mid-flight the temp
+	// PDBs may leak; a subsequent run cleans them up via the same label.
+	if opts.EnsurePDBs {
+		if err := ensureTempPDBs(ctx, clientset, cli.K8sClient, targets, opts.DryRun); err != nil {
+			if cleanupErr := cleanupTempPDBs(ctx, cli.K8sClient, opts.DryRun); cleanupErr != nil {
+				log.Printf("Warning: failed to cleanup partial temporary PDBs: %v", cleanupErr)
+			}
+			return fmt.Errorf("failed to ensure temporary PDBs: %w", err)
+		}
+	}
+
+	drainOpts := nodeDrainOptions{
+		DryRun:          opts.DryRun,
+		EvictionTimeout: opts.EvictionTimeout,
+		NodeTimeout:     opts.NodeTimeout,
+		PollInterval:    2 * time.Second,
+	}
+	evictor := func(c context.Context, t Target, d nodeDrainOptions) error {
+		return evictTarget(c, clientset, cli, opts.ClusterName, t, d)
+	}
+	result := evictAllTargets(ctx, targets, drainOpts, evictor)
+	errs := result.Errors
+
+	// Step 4: cleanup temporary PDBs explicitly here — keeping it inline
+	// (rather than deferred) means the "Deleted temporary PDB …" log lines
+	// appear BEFORE the final success/failure summary, so the user does not
+	// experience an apparent hang after seeing the "✅" box.
+	//
+	// When at least one EKS managed node group did not finish draining
+	// within the timeout, we skip the cleanup: EKS may still be evicting
+	// pods on those nodes, and dropping the temp PDBs now would leave
+	// cross-type workloads unprotected mid-drain. The label-based cleanup
+	// on a subsequent run picks the PDBs up once EKS converges.
+	switch {
+	case !opts.EnsurePDBs:
+	case result.EKSDrainIncomplete:
+		log.Printf("Note: at least one EKS managed node group did not finish draining within --node-timeout; temporary PDBs left in place so EKS can finish safely. Re-run the command once `aws eks describe-nodegroup` reports the group at 0 nodes — the next run will clean them up.")
+	default:
+		if err := cleanupTempPDBs(ctx, cli.K8sClient, opts.DryRun); err != nil {
+			log.Printf("Warning: failed to cleanup temporary PDBs: %v", err)
+		}
+	}
+
+	// Re-classify and persist to reflect the final state. Skipped on dry-run
+	// for the same reason as the pre-eviction Persist above.
+	if opts.DryRun {
+		log.Printf("[dry-run] would re-classify and persist post-eviction cluster-info to ConfigMap %s/%s", namespace, clusterinfo.ConfigMapName)
+	} else if final, classifyErr := classify(ctx, clientset, cli, opts.ClusterName); classifyErr == nil {
+		if err := clusterinfo.Persist(ctx, cli.K8sClient, namespace, final); err != nil {
+			log.Printf("Warning: failed to persist post-eviction cluster-info: %v", err)
+		}
+	} else {
+		log.Printf("Warning: failed to re-classify post-eviction: %v", classifyErr)
+	}
+
+	if len(errs) > 0 {
+		return fmt.Errorf("eviction completed with %d error(s):\n%w", len(errs), errors.Join(errs...))
+	}
+	display.PrintBox(streams.Out, "✅ Legacy nodes drained from cluster "+opts.ClusterName+".")
+	return nil
+}
+
+// reclaimLeakedTempPDBs runs the idempotent, label-based temp-PDB cleanup when
+// PDB management is enabled. It is invoked on the no-target path so that
+// temporary PDBs left behind by a prior interrupted run (e.g. an EKS managed
+// node group that timed out) are reclaimed once the cluster has fully migrated
+// and there is nothing left to evict. A no-op when ensurePDBs is false.
+func reclaimLeakedTempPDBs(ctx context.Context, ctrlClient client.Client, ensurePDBs, dryRun bool) {
+	if !ensurePDBs {
+		return
+	}
+	if err := cleanupTempPDBs(ctx, ctrlClient, dryRun); err != nil {
+		log.Printf("Warning: failed to cleanup leftover temporary PDBs: %v", err)
+	}
+}
+
+// classify wraps clusterinfo.Classify with the call-site boilerplate shared by
+// the pre- and post-eviction snapshots.
+func classify(ctx context.Context, clientset *kubernetes.Clientset, cli *clients.Clients, clusterName string) (*clusterinfo.ClusterInfo, error) {
+	return clusterinfo.Classify(ctx, clusterinfo.ClassifyInput{
+		K8sClient:   clientset,
+		CtrlClient:  cli.K8sClient,
+		Autoscaling: cli.Autoscaling,
+		EKS:         cli.EKS,
+		Discovery:   clientset.Discovery(),
+		ClusterName: clusterName,
+	})
+}
+
+// evictResult is the structured outcome of evictAllTargets. Errors is the
+// aggregated per-target error list (empty when everything succeeded).
+// EKSDrainIncomplete is true when at least one EKS managed node group target
+// returned an error WRAPPING errEKSDrainIncomplete — the EKS drain may still
+// be in progress, in which case the caller must NOT cleanup the temporary
+// PDBs (EKS would then over-disrupt the still-running workloads).
+type evictResult struct {
+	Errors             []error
+	EKSDrainIncomplete bool
+}
+
+// targetEvictor is the closure injected into evictAllTargets by Run. Pulling
+// the dispatch out as an interface-like function makes the orchestrator
+// unit-testable without faking the entire clients.Clients struct.
+type targetEvictor func(ctx context.Context, t Target, drainOpts nodeDrainOptions) error
+
+// evictAllTargets processes targets sequentially, in the order BuildPlan
+// produced. Errors are accumulated; a failing target does not abort the
+// others — every issue surfaces at the end so a partial migration is fully
+// visible. Sequential execution keeps logs linear, bounds the eviction
+// pressure on the apiserver, and matches the synchronous per-target shape
+// (every evictor — including EKS MNG — already blocks until its drain
+// completes).
+func evictAllTargets(ctx context.Context, targets []Target, drainOpts nodeDrainOptions, evictor targetEvictor) evictResult {
+	var (
+		errs               []error
+		eksDrainIncomplete bool
+	)
+	for _, t := range targets {
+		err := evictor(ctx, t, drainOpts)
+		if err == nil {
+			continue
+		}
+		errs = append(errs, fmt.Errorf("%s/%s: %w", t.Manager, t.Entity, err))
+		// Only treat the drain as "still in progress" when EKS accepted the
+		// scaling change but observation of the drain failed or timed out.
+		// A failed UpdateNodegroupConfig means EKS never started draining,
+		// so cleanup is safe.
+		if errors.Is(err, errEKSDrainIncomplete) {
+			eksDrainIncomplete = true
+		}
+	}
+	return evictResult{Errors: errs, EKSDrainIncomplete: eksDrainIncomplete}
+}
+
+// evictTarget dispatches a single Target to the manager-specific evictor.
+// Wrapped into a closure inside Run so evictAllTargets can be exercised with
+// a fake evictor in tests.
+func evictTarget(ctx context.Context, clientset kubernetes.Interface, cli *clients.Clients, clusterName string, t Target, drainOpts nodeDrainOptions) error {
+	switch t.Manager {
+	case clusterinfo.NodeManagerASG:
+		return evictASG(ctx, clientset, cli.Autoscaling, t.Entity, t.Nodes, drainOpts)
+	case clusterinfo.NodeManagerEKSManagedNodeGroup:
+		return evictEKSManagedNodeGroup(ctx, cli.EKS, clientset, clusterName, t.Entity, drainOpts)
+	case clusterinfo.NodeManagerKarpenter:
+		return evictKarpenterUserNodePool(ctx, clientset, t.Entity, t.Nodes, drainOpts)
+	case clusterinfo.NodeManagerStandalone:
+		return evictStandalone(ctx, clientset, cli.EC2, t.Nodes, drainOpts)
+	default:
+		return fmt.Errorf("unsupported manager %q", t.Manager)
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/run_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/run_test.go
new file mode 100644
index 0000000000..af7f20577f
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/run_test.go
@@ -0,0 +1,179 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	policyv1 "k8s.io/api/policy/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
+	ctrlfake "sigs.k8s.io/controller-runtime/pkg/client/fake"
+	"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
+
+	"github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo"
+)
+
+// TestReclaimLeakedTempPDBs covers the no-target recovery path: a temp PDB left
+// behind by a prior interrupted run is reclaimed only when PDB management is
+// enabled and not in dry-run.
+func TestReclaimLeakedTempPDBs(t *testing.T) {
+	leakedTempPDB := func() *policyv1.PodDisruptionBudget {
+		return &policyv1.PodDisruptionBudget{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "leaked", Namespace: "default",
+				Labels: map[string]string{
+					pdbManagedByLabelKey: pdbManagedByLabelValue,
+					pdbTempLabelKey:      pdbTempLabelValue,
+				},
+			},
+		}
+	}
+
+	for _, tc := range []struct {
+		name        string
+		ensurePDBs  bool
+		dryRun      bool
+		wantDeleted bool
+	}{
+		{name: "enabled deletes leaked temp PDB", ensurePDBs: true, wantDeleted: true},
+		{name: "disabled is a no-op", ensurePDBs: false, wantDeleted: false},
+		{name: "dry-run does not delete", ensurePDBs: true, dryRun: true, wantDeleted: false},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			cli := ctrlfake.NewClientBuilder().WithScheme(newCtrlScheme(t)).WithObjects(leakedTempPDB()).Build()
+
+			reclaimLeakedTempPDBs(t.Context(), cli, tc.ensurePDBs, tc.dryRun)
+
+			list := &policyv1.PodDisruptionBudgetList{}
+			require.NoError(t, cli.List(t.Context(), list))
+			if tc.wantDeleted {
+				assert.Empty(t, list.Items)
+			} else {
+				require.Len(t, list.Items, 1)
+				assert.Equal(t, "leaked", list.Items[0].Name)
+			}
+		})
+	}
+
+	// A cleanup error must be swallowed (logged, not propagated): the no-op
+	// exit must still succeed even if reclaiming the leaked PDBs fails.
+	t.Run("cleanup error is swallowed", func(t *testing.T) {
+		cli := ctrlfake.NewClientBuilder().
+			WithScheme(newCtrlScheme(t)).
+			WithObjects(leakedTempPDB()).
+			WithInterceptorFuncs(interceptor.Funcs{
+				List: func(context.Context, ctrlclient.WithWatch, ctrlclient.ObjectList, ...ctrlclient.ListOption) error {
+					return errors.New("boom")
+				},
+			}).
+			Build()
+
+		assert.NotPanics(t, func() {
+			reclaimLeakedTempPDBs(t.Context(), cli, true, false)
+		})
+	})
+}
+
+func TestEvictAllTargets(t *testing.T) {
+	asgTarget := Target{Manager: clusterinfo.NodeManagerASG, Entity: "asg-1"}
+	karpenterTarget := Target{Manager: clusterinfo.NodeManagerKarpenter, Entity: "np-1"}
+	eksTarget := Target{Manager: clusterinfo.NodeManagerEKSManagedNodeGroup, Entity: "mng-1"}
+	standaloneTarget := Target{Manager: clusterinfo.NodeManagerStandalone, Entity: ""}
+
+	for _, tc := range []struct {
+		name              string
+		targets           []Target
+		evictor           targetEvictor
+		wantErrors        int
+		wantEKSIncomplete bool
+		wantErrIsSentinel bool
+		// expectInvoked, when non-nil, lists managers that MUST have had
+		// their evictor called (verifies that one failing target does not
+		// abort the others).
+		expectInvoked []clusterinfo.NodeManager
+	}{
+		{
+			name:    "happy path",
+			targets: []Target{asgTarget, karpenterTarget},
+			evictor: func(_ context.Context, _ Target, _ nodeDrainOptions) error { return nil },
+		},
+		{
+			// Safety-critical signal: an EKS MNG target returning
+			// errEKSDrainIncomplete (wrapped) must flag the result so Run
+			// skips the temp-PDB cleanup.
+			name:    "EKS sentinel propagates",
+			targets: []Target{eksTarget, asgTarget},
+			evictor: func(_ context.Context, t Target, _ nodeDrainOptions) error {
+				if t.Manager == clusterinfo.NodeManagerEKSManagedNodeGroup {
+					return fmt.Errorf("simulated timeout: %w", errEKSDrainIncomplete)
+				}
+				return nil
+			},
+			wantErrors:        1,
+			wantEKSIncomplete: true,
+			wantErrIsSentinel: true,
+		},
+		{
+			// An EKS failure that is NOT a wait timeout (e.g. an
+			// UpdateNodegroupConfig API rejection) means EKS never started
+			// draining, so temp-PDB cleanup is safe ⇒ flag stays false.
+			name:    "non-sentinel EKS error does not flag",
+			targets: []Target{eksTarget},
+			evictor: func(_ context.Context, _ Target, _ nodeDrainOptions) error {
+				return errors.New("invalid scaling config")
+			},
+			wantErrors: 1,
+		},
+		{
+			// An ASG drain failure has nothing to do with EKS — must NOT
+			// flag.
+			name:    "non-EKS error does not flag",
+			targets: []Target{asgTarget},
+			evictor: func(_ context.Context, _ Target, _ nodeDrainOptions) error {
+				return errors.New("ASG drain failed")
+			},
+			wantErrors: 1,
+		},
+		{
+			// One failing target must not abort the rest — the loop keeps
+			// going and every other manager is still invoked.
+			name:       "failing target does not abort the others",
+			targets:    []Target{asgTarget, karpenterTarget, standaloneTarget},
+			evictor:    nil, // installed inline below to capture invocations.
+			wantErrors: 1,
+			expectInvoked: []clusterinfo.NodeManager{
+				clusterinfo.NodeManagerASG,
+				clusterinfo.NodeManagerKarpenter,
+				clusterinfo.NodeManagerStandalone,
+			},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			invoked := map[clusterinfo.NodeManager]int{}
+			evictor := tc.evictor
+			if evictor == nil {
+				evictor = func(_ context.Context, target Target, _ nodeDrainOptions) error {
+					invoked[target.Manager]++
+					if target.Manager == clusterinfo.NodeManagerASG {
+						return errors.New("boom")
+					}
+					return nil
+				}
+			}
+
+			result := evictAllTargets(t.Context(), tc.targets, nodeDrainOptions{}, evictor)
+			require.Len(t, result.Errors, tc.wantErrors)
+			assert.Equal(t, tc.wantEKSIncomplete, result.EKSDrainIncomplete)
+			if tc.wantErrIsSentinel {
+				assert.ErrorIs(t, result.Errors[0], errEKSDrainIncomplete)
+			}
+			for _, mgr := range tc.expectInvoked {
+				assert.NotZero(t, invoked[mgr], "expected %s evictor to be invoked", mgr)
+			}
+		})
+	}
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/standalone.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/standalone.go
new file mode 100644
index 0000000000..499de4fd96
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/standalone.go
@@ -0,0 +1,66 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+
+	"github.com/aws/aws-sdk-go-v2/service/ec2"
+	"k8s.io/client-go/kubernetes"
+
+	commonaws "github.com/DataDog/datadog-operator/cmd/kubectl-datadog/autoscaling/cluster/common/aws"
+)
+
+// EC2API is the subset of *ec2.Client used by evictStandalone. Defined as an
+// interface so unit tests can stub the AWS SDK out cheaply.
+type EC2API interface {
+	TerminateInstances(ctx context.Context, in *ec2.TerminateInstancesInput, opts ...func(*ec2.Options)) (*ec2.TerminateInstancesOutput, error)
+}
+
+// evictStandalone cordons and drains every standalone EC2 instance (one whose
+// Kubernetes Node has an `aws:///<az>/i-<hex>` providerID but is not in any
+// ASG, EKS managed node group, or Karpenter NodePool). Every node is cordoned
+// up front (so pods evicted from one node are never rescheduled onto another
+// node of the group that is itself about to be drained), then each instance is
+// terminated as soon as its node has drained cleanly so the IaaS capacity is
+// freed without waiting for the rest of the group. Terminating standalone
+// instances directly is safe: by definition they have no controller that would
+// relaunch them.
+//
+// Safety rule: an EC2 instance is only terminated when its node was fully
+// drained. If draining failed (PDB-blocked, timeout, etc.), we leave the
+// underlying instance intact — terminating it would kill the still-running
+// pods. A re-run picks up where this one stopped.
+func evictStandalone(ctx context.Context, clientset kubernetes.Interface, ec2API EC2API, nodes []string, drainOpts nodeDrainOptions) error {
+	cordoned, errs := cordonNodes(ctx, clientset, nodes, drainOpts.DryRun)
+
+	for _, node := range cordoned {
+		nodeName := node.Name
+		id, hasInstanceID := commonaws.ExtractEC2InstanceID(node)
+		if !hasInstanceID {
+			log.Printf("Warning: node %s has unexpected providerID %q; cannot terminate the underlying instance", nodeName, node.Spec.ProviderID)
+		}
+		if err := drainNode(ctx, clientset, nodeName, drainOpts); err != nil {
+			errs = append(errs, fmt.Errorf("drain node %s: %w", nodeName, err))
+			continue // do NOT terminate this instance: workloads are still on it
+		}
+		// The node drained cleanly: terminate its instance right away so its
+		// capacity is freed without waiting for the rest of the group.
+		if !hasInstanceID {
+			continue
+		}
+		if drainOpts.DryRun {
+			log.Printf("[dry-run] would terminate standalone instance %s", id)
+			continue
+		}
+		if _, err := ec2API.TerminateInstances(ctx, &ec2.TerminateInstancesInput{
+			InstanceIds: []string{id},
+		}); err != nil {
+			errs = append(errs, fmt.Errorf("terminate instance %s: %w", id, err))
+		} else {
+			log.Printf("Terminated standalone EC2 instance %s.", id)
+		}
+	}
+	return errors.Join(errs...)
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/evict/standalone_test.go b/cmd/kubectl-datadog/autoscaling/cluster/evict/standalone_test.go
new file mode 100644
index 0000000000..49695d29b2
--- /dev/null
+++ b/cmd/kubectl-datadog/autoscaling/cluster/evict/standalone_test.go
@@ -0,0 +1,198 @@
+package evict
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+
+	"github.com/aws/aws-sdk-go-v2/service/ec2"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	policyv1 "k8s.io/api/policy/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+	clienttesting "k8s.io/client-go/testing"
+)
+
+type stubEC2 struct {
+	terminated []string
+	err        error
+}
+
+func (s *stubEC2) TerminateInstances(_ context.Context, in *ec2.TerminateInstancesInput, _ ...func(*ec2.Options)) (*ec2.TerminateInstancesOutput, error) {
+	s.terminated = append(s.terminated, in.InstanceIds...)
+	return &ec2.TerminateInstancesOutput{}, s.err
+}
+
+func TestEvictStandalone(t *testing.T) {
+	ec2Node := func(name, az, id string) *corev1.Node {
+		return &corev1.Node{
+			ObjectMeta: metav1.ObjectMeta{Name: name},
+			Spec:       corev1.NodeSpec{ProviderID: "aws:///" + az + "/" + id},
+		}
+	}
+	stuckPod := func() *corev1.Pod {
+		return &corev1.Pod{
+			ObjectMeta: metav1.ObjectMeta{Name: "blocker", Namespace: "default"},
+			Spec:       corev1.PodSpec{NodeName: "ip-stuck"},
+		}
+	}
+	gceNode := &corev1.Node{
+		ObjectMeta: metav1.ObjectMeta{Name: "ip-1"},
+		Spec:       corev1.NodeSpec{ProviderID: "gce:///zone/instance"},
+	}
+	fastDrain := nodeDrainOptions{
+		EvictionTimeout: 50 * time.Millisecond,
+		NodeTimeout:     100 * time.Millisecond,
+		PollInterval:    10 * time.Millisecond,
+	}
+
+	for _, tc := range []struct {
+		name                   string
+		objects                []runtime.Object
+		installEvictionReactor bool
+		nodes                  []string
+		opts                   nodeDrainOptions
+		// stubErr, when set, makes every TerminateInstances call fail.
+		stubErr error
+		wantErr bool
+		// wantTerminated is the expected set of instance IDs in
+		// stubEC2.terminated. nil ⇒ TerminateInstances must not be called.
+		wantTerminated []string
+		// wantUnschedulable: per-node expected `Spec.Unschedulable`.
+		wantUnschedulable map[string]bool
+	}{
+		{
+			name:              "happy path terminates and cordons two nodes",
+			objects:           []runtime.Object{ec2Node("ip-1", "eu-west-3a", "i-aaa"), ec2Node("ip-2", "eu-west-3b", "i-bbb")},
+			nodes:             []string{"ip-1", "ip-2"},
+			opts:              newDrainOpts(false),
+			wantTerminated:    []string{"i-aaa", "i-bbb"},
+			wantUnschedulable: map[string]bool{"ip-1": true, "ip-2": true},
+		},
+		{
+			name:              "dry-run touches nothing",
+			objects:           []runtime.Object{ec2Node("ip-1", "eu-west-3a", "i-aaa")},
+			nodes:             []string{"ip-1"},
+			opts:              newDrainOpts(true),
+			wantUnschedulable: map[string]bool{"ip-1": false},
+		},
+		{
+			// Node already gone from K8s ⇒ no instance ID extracted ⇒
+			// no TerminateInstances call.
+			name:  "node already gone skips terminate",
+			nodes: []string{"ip-1"},
+			opts:  newDrainOpts(false),
+		},
+		{
+			// Non-EC2 providerID can't yield an instance ID, but the
+			// node is still cordoned and drained.
+			name:              "non-EC2 providerID skips terminate but still cordons",
+			objects:           []runtime.Object{gceNode},
+			nodes:             []string{"ip-1"},
+			opts:              newDrainOpts(false),
+			wantUnschedulable: map[string]bool{"ip-1": true},
+		},
+		{
+			// Safety regression: drain failure must prevent the
+			// instance from being terminated (otherwise the EC2 dies
+			// mid-grace-period).
+			name:                   "drain failure prevents terminate",
+			objects:                []runtime.Object{ec2Node("ip-stuck", "eu-west-3a", "i-aaaaaaaaaaaaaaaaa"), stuckPod()},
+			installEvictionReactor: true,
+			nodes:                  []string{"ip-stuck"},
+			opts:                   fastDrain,
+			wantErr:                true,
+		},
+		{
+			// A TerminateInstances failure on one node must propagate as an
+			// error yet not stop the loop: every drained node's instance is
+			// still attempted.
+			name:           "terminate failure propagates but loop continues",
+			objects:        []runtime.Object{ec2Node("ip-1", "eu-west-3a", "i-aaa"), ec2Node("ip-2", "eu-west-3b", "i-bbb")},
+			nodes:          []string{"ip-1", "ip-2"},
+			opts:           newDrainOpts(false),
+			stubErr:        errors.New("terminate boom"),
+			wantErr:        true,
+			wantTerminated: []string{"i-aaa", "i-bbb"},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			client := fake.NewClientset(tc.objects...)
+			if tc.installEvictionReactor {
+				installPodEvictionReactor(client)
+			}
+			stub := &stubEC2{err: tc.stubErr}
+
+			err := evictStandalone(t.Context(), client, stub, tc.nodes, tc.opts)
+			if tc.wantErr {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+			}
+			if tc.wantTerminated == nil {
+				assert.Empty(t, stub.terminated)
+			} else {
+				assert.ElementsMatch(t, tc.wantTerminated, stub.terminated)
+			}
+			for nodeName, want := range tc.wantUnschedulable {
+				got, getErr := client.CoreV1().Nodes().Get(t.Context(), nodeName, metav1.GetOptions{})
+				require.NoError(t, getErr)
+				assert.Equal(t, want, got.Spec.Unschedulable, "Spec.Unschedulable for %s", nodeName)
+			}
+		})
+	}
+}
+
+// TestEvictStandaloneCordonsAllBeforeDraining locks in the cordon-all-up-front
+// behavior: every node of the group is cordoned before ANY node starts
+// draining, so a pod evicted off one node is never rescheduled onto a sibling
+// node that is itself about to be drained. It asserts that at the moment ip-1's
+// pod is evicted, the sibling ip-2 has already been cordoned.
+func TestEvictStandaloneCordonsAllBeforeDraining(t *testing.T) {
+	ec2Node := func(name, az, id string) *corev1.Node {
+		return &corev1.Node{
+			ObjectMeta: metav1.ObjectMeta{Name: name},
+			Spec:       corev1.NodeSpec{ProviderID: "aws:///" + az + "/" + id},
+		}
+	}
+	pod1 := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Name: "p1", Namespace: "default"},
+		Spec:       corev1.PodSpec{NodeName: "ip-1"},
+	}
+	client := fake.NewClientset(ec2Node("ip-1", "eu-west-3a", "i-aaa"), ec2Node("ip-2", "eu-west-3b", "i-bbb"), pod1)
+
+	var (
+		recorded                     bool
+		node2CordonedAtFirstEviction bool
+	)
+	// On the first pod eviction (ip-1's drain), record whether ip-2 is already
+	// cordoned, then delete the evicted pod so the node becomes empty and the
+	// drain completes. Read/mutate via the tracker, not the typed client:
+	// Fake.Invokes holds a global lock for the whole reaction, so re-entering
+	// the typed client from inside a reactor would deadlock.
+	client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
+		ca, ok := action.(clienttesting.CreateAction)
+		if !ok || ca.GetSubresource() != "eviction" {
+			return false, nil, nil
+		}
+		if !recorded {
+			recorded = true
+			if obj, err := client.Tracker().Get(corev1.SchemeGroupVersion.WithResource("nodes"), "", "ip-2"); err == nil {
+				node2CordonedAtFirstEviction = obj.(*corev1.Node).Spec.Unschedulable
+			}
+		}
+		evic := ca.GetObject().(*policyv1.Eviction)
+		_ = client.Tracker().Delete(corev1.SchemeGroupVersion.WithResource("pods"), evic.Namespace, evic.Name)
+		return true, ca.GetObject(), nil
+	})
+
+	stub := &stubEC2{}
+	err := evictStandalone(t.Context(), client, stub, []string{"ip-1", "ip-2"}, newDrainOpts(false))
+	require.NoError(t, err)
+	assert.True(t, node2CordonedAtFirstEviction, "ip-2 must be cordoned before ip-1 starts draining")
+	assert.ElementsMatch(t, []string{"i-aaa", "i-bbb"}, stub.terminated)
+}
diff --git a/cmd/kubectl-datadog/autoscaling/cluster/uninstall/uninstall.go b/cmd/kubectl-datadog/autoscaling/cluster/uninstall/uninstall.go
index 593b8f767c..f0c8195513 100644
--- a/cmd/kubectl-datadog/autoscaling/cluster/uninstall/uninstall.go
+++ b/cmd/kubectl-datadog/autoscaling/cluster/uninstall/uninstall.go
@@ -206,57 +206,57 @@ func displayResourceSummary(ctx context.Context, cmd *cobra.Command, cli *client
 	cmd.Println("\nThis will delete:")
 
 	if n, err := listKarpenterNodePools(ctx, cli); err != nil {
-		cmd.Printf("  - NodePools: (unable to list: %v)\n", err)
+		cmd.Printf("  • NodePools: (unable to list: %v)\n", err)
 	} else if len(n) == 0 {
-		cmd.Println("  - NodePools: none found")
+		cmd.Println("  • NodePools: none found")
 	} else {
 		nodePools = n
-		cmd.Printf("  - %d NodePool(s):\n", len(nodePools))
+		cmd.Printf("  • %d NodePool(s):\n", len(nodePools))
 		for _, np := range nodePools {
-			cmd.Printf("      • %s\n", np)
+			cmd.Printf("      ◦ %s\n", np)
 		}
 	}
 
 	nodeClasses, err := listKarpenterEC2NodeClasses(ctx, cli)
 	if err != nil {
-		cmd.Printf("  - EC2NodeClasses: (unable to list: %v)\n", err)
+		cmd.Printf("  • EC2NodeClasses: (unable to list: %v)\n", err)
 	} else if len(nodeClasses) == 0 {
-		cmd.Println("  - EC2NodeClasses: none found")
+		cmd.Println("  • EC2NodeClasses: none found")
 	} else {
-		cmd.Printf("  - %d EC2NodeClass(es):\n", len(nodeClasses))
+		cmd.Printf("  • %d EC2NodeClass(es):\n", len(nodeClasses))
 		for _, nc := range nodeClasses {
-			cmd.Printf("      • %s\n", nc)
+			cmd.Printf("      ◦ %s\n", nc)
 		}
 	}
 
 	if err != nil {
-		cmd.Println("  - Karpenter nodes: (unable to list - depends on EC2NodeClasses)")
+		cmd.Println("  • Karpenter nodes: (unable to list - depends on EC2NodeClasses)")
 	} else if n, err := listKarpenterNodes(ctx, cli, nodeClasses); err != nil {
-		cmd.Printf("  - Karpenter nodes: (unable to list: %v)\n", err)
+		cmd.Printf("  • Karpenter nodes: (unable to list: %v)\n", err)
 	} else if len(n) == 0 {
-		cmd.Println("  - Karpenter nodes: none found")
+		cmd.Println("  • Karpenter nodes: none found")
 	} else {
 		nodes = n
-		cmd.Printf("  - %d Karpenter-managed node(s):\n", len(nodes))
+		cmd.Printf("  • %d Karpenter-managed node(s):\n", len(nodes))
 		for _, node := range nodes {
-			cmd.Printf("      • %s\n", node)
+			cmd.Printf("      ◦ %s\n", node)
 		}
 	}
 
-	cmd.Println("  - The Karpenter Helm release")
+	cmd.Println("  • The Karpenter Helm release")
 
 	if stacks, err := listCloudFormationStacks(ctx, cli, clusterName); err != nil {
-		cmd.Printf("  - CloudFormation stacks: (unable to list: %v)\n", err)
+		cmd.Printf("  • CloudFormation stacks: (unable to list: %v)\n", err)
 	} else if len(stacks) == 0 {
-		cmd.Println("  - CloudFormation stacks: none found")
+		cmd.Println("  • CloudFormation stacks: none found")
 	} else {
-		cmd.Printf("  - %d CloudFormation stack(s):\n", len(stacks))
+		cmd.Printf("  • %d CloudFormation stack(s):\n", len(stacks))
 		for _, stack := range stacks {
-			cmd.Printf("      • %s\n", stack)
+			cmd.Printf("      ◦ %s\n", stack)
 		}
 	}
 
-	cmd.Println("  - aws-auth ConfigMap role mappings (if applicable)")
+	cmd.Println("  • aws-auth ConfigMap role mappings (if applicable)")
 
 	if len(nodes) > 0 {
 		cmd.Println()