From 50ce7fad46dc5e85c08a130ba01d8708113561cd Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Wed, 17 Jun 2026 16:48:28 +0100 Subject: [PATCH] Fix flake in best-effort QoS test due to debug pods test/extended/node defines 2 helper functions which create debug pods with "oc debug" in the "openshift-machine-config-operator" namespace. These helper functions have numerous callers. By design, the debug pods have best-effort QoS. The "[sig-arch] Managed cluster should ensure control plane pods do not run in best-effort QoS" test looks for pods in openshift namespaces, including openshift-machine-config-operator. It fails if any have best-effort QoS. This test fails if it coincides with some other test which is using the node helper functions. --- test/extended/node/node_utils.go | 8 ++++---- test/extended/operators/qos.go | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/test/extended/node/node_utils.go b/test/extended/node/node_utils.go index f87a40dcd3fb..44cb115005cb 100644 --- a/test/extended/node/node_utils.go +++ b/test/extended/node/node_utils.go @@ -95,8 +95,8 @@ func getPureWorkerNodes(nodes []corev1.Node) []corev1.Node { } const ( - // debugNamespace is the namespace for debug pods - debugNamespace = "openshift-machine-config-operator" + // DebugNamespace is the namespace for debug pods + DebugNamespace = "openshift-machine-config-operator" // cnvNamespace is the namespace for CNV operator cnvNamespace = "openshift-cnv" // cnvOperatorGroup is the name of the CNV operator group @@ -158,7 +158,7 @@ func getCNVWorkerNodeName(ctx context.Context, oc *exutil.CLI) string { // ExecOnNodeWithChroot runs a command on a node using oc debug with chroot /host func ExecOnNodeWithChroot(oc *exutil.CLI, nodeName string, cmd ...string) (string, error) { - args := append([]string{"node/" + nodeName, "-n" + debugNamespace, "--", "chroot", "/host"}, cmd...) + args := append([]string{"node/" + nodeName, "-n" + DebugNamespace, "--", "chroot", "/host"}, cmd...) stdOut, _, err := oc.AsAdmin().WithoutNamespace().Run("debug").Args(args...).Outputs() return stdOut, err } @@ -167,7 +167,7 @@ func ExecOnNodeWithChroot(oc *exutil.CLI, nodeName string, cmd ...string) (strin // This is needed for swap operations (swapon/swapoff) that require direct namespace access func ExecOnNodeWithNsenter(oc *exutil.CLI, nodeName string, cmd ...string) (string, error) { nsenterCmd := append([]string{"nsenter", "-a", "-t", "1"}, cmd...) - args := append([]string{"node/" + nodeName, "-n" + debugNamespace, "--"}, nsenterCmd...) + args := append([]string{"node/" + nodeName, "-n" + DebugNamespace, "--"}, nsenterCmd...) stdOut, _, err := oc.AsAdmin().WithoutNamespace().Run("debug").Args(args...).Outputs() return stdOut, err } diff --git a/test/extended/operators/qos.go b/test/extended/operators/qos.go index d6cdcc1e200b..5cfcd62b5313 100644 --- a/test/extended/operators/qos.go +++ b/test/extended/operators/qos.go @@ -12,6 +12,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" e2e "k8s.io/kubernetes/test/e2e/framework" + "github.com/openshift/origin/test/extended/node" exutil "github.com/openshift/origin/test/extended/util" ) @@ -69,6 +70,16 @@ var _ = Describe("[sig-arch] Managed cluster should", func() { if hasPrefixSet(pod.Name, excludePodPrefix) { continue } + // Exclude ephemeral oc debug node pods created by the + // node.ExecOnNode*() helper functions. + // + // These are privileged, transient pods with no resource requests. + // They are best-effort QoS by design. They are created by many + // other tests, and will cause this test to fail if one happens to + // be present while it executes. + if pod.Namespace == node.DebugNamespace && isEphemeralDebugPod(&pod) { + continue + } if pod.Status.QOSClass == v1.PodQOSBestEffort { invalidPodQoS.Insert(fmt.Sprintf("%s/%s is running in best-effort QoS", pod.Namespace, pod.Name)) } @@ -79,3 +90,15 @@ var _ = Describe("[sig-arch] Managed cluster should", func() { } }) }) + +func isEphemeralDebugPod(pod *v1.Pod) bool { + // Debug pods created by oc can be identified via: + // - the managed-by label set by modern oc versions + // - the source-resource annotation set by modern oc versions + // - the "-debug-" name pattern used by all oc versions + // The name pattern is the only reliable signal for older oc binaries + // (e.g. oc/v4.2.0) that predate the label/annotation. + return pod.Labels["debug.openshift.io/managed-by"] == "oc-debug" || + pod.Annotations["debug.openshift.io/source-resource"] != "" || + strings.Contains(pod.Name, "-debug-") +}