diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrole.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrole.yaml index 395bb0bb2f..d2e12e7c9e 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrole.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrole.yaml @@ -3,7 +3,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: creationTimestamp: null - name: retina-operator-role + name: retina-operator-clusterrole rules: - apiGroups: - "apiextensions.k8s.io" @@ -100,19 +100,4 @@ rules: - get - list - watch - # For cilium-operator running in HA mode. - # - # Cilium operator running in HA mode requires the use of ResourceLock for Leader Election - # between multiple running instances. - # The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less - # common and fewer objects in the cluster watch "all Leases". - - apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - update - {{- end -}} diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrolebinding.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrolebinding.yaml index 3138f41fbf..a67b2a1c99 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrolebinding.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/operator/clusterrolebinding.yaml @@ -13,7 +13,7 @@ metadata: roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: retina-operator-role + name: retina-operator-clusterrole subjects: - kind: ServiceAccount name: retina-operator diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/operator/deployment.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/operator/deployment.yaml index e44a535c81..e74dad4915 100644 --- a/deploy/hubble/manifests/controller/helm/retina/templates/operator/deployment.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/templates/operator/deployment.yaml @@ -17,7 +17,7 @@ spec: selector: matchLabels: control-plane: retina-operator - replicas: 1 + replicas: {{ .Values.operator.replicas }} template: metadata: annotations: @@ -56,16 +56,22 @@ spec: {{- end }} {{ else }} command: - - /retina-operator + - /retina-operator {{- end }} {{- if .Values.operator.container.args}} args: {{- range $.Values.operator.container.args}} - {{ . | quote }} {{- end}} - {{ else }} + {{- if $.Values.operator.leaderElectionNamespace }} + - --leader-election-namespace={{ $.Values.operator.leaderElectionNamespace }} + {{- end }} + {{- else }} args: - --config-dir=/retina + {{- if .Values.operator.leaderElectionNamespace }} + - --leader-election-namespace={{ .Values.operator.leaderElectionNamespace }} + {{- end }} {{- end}} env: # this env var is used by retina OSS telemetry and zap diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/operator/role.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/operator/role.yaml new file mode 100644 index 0000000000..00a7f29638 --- /dev/null +++ b/deploy/hubble/manifests/controller/helm/retina/templates/operator/role.yaml @@ -0,0 +1,23 @@ +{{- if .Values.operator.enabled -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: retina-operator-role + namespace: {{ .Values.operator.leaderElectionNamespace | default .Release.Namespace }} +rules: + # For cilium-operator running in HA mode. + # + # Cilium operator running in HA mode requires the use of ResourceLock for Leader Election + # between multiple running instances. + # The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less + # common and fewer objects in the cluster watch "all Leases". + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update + +{{- end -}} diff --git a/deploy/hubble/manifests/controller/helm/retina/templates/operator/rolebinding.yaml b/deploy/hubble/manifests/controller/helm/retina/templates/operator/rolebinding.yaml new file mode 100644 index 0000000000..20eabf431a --- /dev/null +++ b/deploy/hubble/manifests/controller/helm/retina/templates/operator/rolebinding.yaml @@ -0,0 +1,23 @@ +{{- if .Values.operator.enabled -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: rolebinding + app.kubernetes.io/instance: retina-operator-rolebinding + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: retina-operator-rolebinding + namespace: {{ .Values.operator.leaderElectionNamespace | default .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: retina-operator-role +subjects: +- kind: ServiceAccount + name: retina-operator + namespace: {{ .Release.Namespace }} + +{{- end -}} diff --git a/deploy/hubble/manifests/controller/helm/retina/values.yaml b/deploy/hubble/manifests/controller/helm/retina/values.yaml index 0967106ab8..91d7fb787c 100644 --- a/deploy/hubble/manifests/controller/helm/retina/values.yaml +++ b/deploy/hubble/manifests/controller/helm/retina/values.yaml @@ -16,6 +16,7 @@ os: # FIXME: remove unnecessary pieces, etc. operator: enabled: true + replicas: 1 repository: acndev.azurecr.io/retina-operator pullPolicy: Always tag: "latest" @@ -32,6 +33,7 @@ operator: # -- Node tolerations for pod assignment on nodes with taints # ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ tolerations: [] + leaderElectionNamespace: "" agent: leaderElection: false diff --git a/operator/cilium-crds/config/config_linux.go b/operator/cilium-crds/config/config_linux.go index bb73d7aff4..a9298e3fc5 100644 --- a/operator/cilium-crds/config/config_linux.go +++ b/operator/cilium-crds/config/config_linux.go @@ -8,19 +8,22 @@ import ( ) type Config struct { - EnableTelemetry bool - LeaderElection bool + EnableTelemetry bool + LeaderElection bool + LeaderElectionNamespace string } func (c Config) Flags(flags *pflag.FlagSet) { flags.Bool("enable-telemetry", c.EnableTelemetry, "enable telemetry (send logs and metrics to a remote server)") flags.Bool("leader-election", c.LeaderElection, "Enable leader election for operator. Ensures there is only one active operator Pod") + flags.String("leader-election-namespace", c.LeaderElectionNamespace, "Namespace for the leader election lease. Auto-detected from the service account token when empty.") } var ( DefaultConfig = Config{ - EnableTelemetry: false, - LeaderElection: false, + EnableTelemetry: false, + LeaderElection: false, + LeaderElectionNamespace: "", } Cell = cell.Module( diff --git a/operator/cmd/cilium-crds/cells_linux.go b/operator/cmd/cilium-crds/cells_linux.go index 4339a1b8ee..957fa7f5a1 100644 --- a/operator/cmd/cilium-crds/cells_linux.go +++ b/operator/cmd/cilium-crds/cells_linux.go @@ -8,8 +8,11 @@ package ciliumcrds import ( "context" + "errors" "fmt" "log/slog" + "os" + "strings" "sync/atomic" "github.com/microsoft/retina/internal/buildinfo" @@ -42,7 +45,27 @@ import ( "github.com/cilium/statedb" ) -const operatorK8sNamespace = "kube-system" +const ( + serviceAccountNamespaceFile = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" + errMsgNamespaceFileEmpty = "--leader-election-namespace not set and " + serviceAccountNamespaceFile + " was empty" +) + +var errNamespaceFileEmpty = errors.New(errMsgNamespaceFileEmpty) + +// resolveNamespace returns the namespace the operator should use for leader election. +func resolveNamespace(override string) (string, error) { + if override != "" { + return override, nil + } + data, err := os.ReadFile(serviceAccountNamespaceFile) + if err != nil { + return "", fmt.Errorf("--leader-election-namespace not set and could not read %s: %w", serviceAccountNamespaceFile, err) + } + if ns := strings.TrimSpace(string(data)); ns != "" { + return ns, nil + } + return "", errNamespaceFileEmpty +} var ( Operator = cell.Module( diff --git a/operator/cmd/cilium-crds/root_linux.go b/operator/cmd/cilium-crds/root_linux.go index 7cbc570d5c..e0ef9fa8ed 100644 --- a/operator/cmd/cilium-crds/root_linux.go +++ b/operator/cmd/cilium-crds/root_linux.go @@ -26,6 +26,7 @@ import ( "github.com/cilium/cilium/pkg/option" "github.com/cilium/hive/cell" "github.com/microsoft/retina/internal/buildinfo" + "github.com/microsoft/retina/operator/cilium-crds/config" "github.com/microsoft/retina/pkg/log" "github.com/pkg/errors" "github.com/spf13/viper" @@ -64,13 +65,19 @@ func Execute(h *hive.Hive) { func registerOperatorHooks( l *slog.Logger, lc cell.Lifecycle, llc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner, -) { + cfg config.Config, +) error { + leaderElectionNamespace, err := resolveNamespace(cfg.LeaderElectionNamespace) + if err != nil { + return fmt.Errorf("failed to determine operator namespace: %w", err) + } + l.Info("using namespace for leader election lease", "namespace", leaderElectionNamespace) var wg sync.WaitGroup lc.Append(cell.Hook{ OnStart: func(cell.HookContext) error { wg.Add(1) go func() { - runOperator(l, llc, clientset, shutdowner) + runOperator(l, llc, clientset, shutdowner, leaderElectionNamespace) wg.Done() }() return nil @@ -84,6 +91,7 @@ func registerOperatorHooks( return nil }, }) + return nil } func initEnv(vp *viper.Viper) { @@ -134,7 +142,7 @@ func doCleanup() { // runOperator implements the logic of leader election for cilium-operator using // built-in leader election capability in kubernetes. // See: https://github.com/kubernetes/client-go/blob/master/examples/leader-election/main.go -func runOperator(l *slog.Logger, lc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner) { +func runOperator(l *slog.Logger, lc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner, leaderElectionNamespace string) { isLeader.Store(false) leaderElectionCtx, leaderElectionCtxCancel = context.WithCancel(context.Background()) @@ -152,7 +160,7 @@ func runOperator(l *slog.Logger, lc *LeaderLifecycle, clientset k8sClient.Client leResourceLock, err := resourcelock.NewFromKubeconfig( resourcelock.LeasesResourceLock, - operatorK8sNamespace, + leaderElectionNamespace, leaderElectionResourceLockName, resourcelock.ResourceLockConfig{ // Identity name of the lock holder