Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
creationTimestamp: null
name: retina-operator-role
name: retina-operator-clusterrole
rules:
- apiGroups:
- "apiextensions.k8s.io"
Expand Down Expand Up @@ -100,19 +100,4 @@ rules:
- get
- list
- watch
# For cilium-operator running in HA mode.
#
# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election
# between multiple running instances.
# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less
# common and fewer objects in the cluster watch "all Leases".
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- get
- update

{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ metadata:
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: retina-operator-role
name: retina-operator-clusterrole
subjects:
- kind: ServiceAccount
name: retina-operator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
selector:
matchLabels:
control-plane: retina-operator
replicas: 1
replicas: {{ .Values.operator.replicas }}
template:
metadata:
annotations:
Expand Down Expand Up @@ -56,16 +56,22 @@ spec:
{{- end }}
{{ else }}
command:
- /retina-operator
- /retina-operator
{{- end }}
{{- if .Values.operator.container.args}}
args:
{{- range $.Values.operator.container.args}}
- {{ . | quote }}
{{- end}}
{{ else }}
{{- if $.Values.operator.leaderElectionNamespace }}
- --leader-election-namespace={{ $.Values.operator.leaderElectionNamespace }}
{{- end }}
{{- else }}
args:
- --config-dir=/retina
{{- if .Values.operator.leaderElectionNamespace }}
- --leader-election-namespace={{ .Values.operator.leaderElectionNamespace }}
{{- end }}
{{- end}}
env:
# this env var is used by retina OSS telemetry and zap
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{- if .Values.operator.enabled -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: retina-operator-role
namespace: {{ .Values.operator.leaderElectionNamespace | default .Release.Namespace }}
rules:
# For cilium-operator running in HA mode.
#
# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election
# between multiple running instances.
# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less
# common and fewer objects in the cluster watch "all Leases".
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- get
- update

{{- end -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{- if .Values.operator.enabled -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/name: rolebinding
app.kubernetes.io/instance: retina-operator-rolebinding
app.kubernetes.io/component: rbac
app.kubernetes.io/created-by: operator
app.kubernetes.io/part-of: operator
app.kubernetes.io/managed-by: kustomize
name: retina-operator-rolebinding
namespace: {{ .Values.operator.leaderElectionNamespace | default .Release.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: retina-operator-role
subjects:
- kind: ServiceAccount
name: retina-operator
namespace: {{ .Release.Namespace }}

{{- end -}}
2 changes: 2 additions & 0 deletions deploy/hubble/manifests/controller/helm/retina/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ os:
# FIXME: remove unnecessary pieces, etc.
operator:
enabled: true
replicas: 1
repository: acndev.azurecr.io/retina-operator
pullPolicy: Always
tag: "latest"
Expand All @@ -32,6 +33,7 @@ operator:
# -- Node tolerations for pod assignment on nodes with taints
# ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
tolerations: []
leaderElectionNamespace: ""

agent:
leaderElection: false
Expand Down
11 changes: 7 additions & 4 deletions operator/cilium-crds/config/config_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,22 @@ import (
)

type Config struct {
EnableTelemetry bool
LeaderElection bool
EnableTelemetry bool
LeaderElection bool
LeaderElectionNamespace string
}

func (c Config) Flags(flags *pflag.FlagSet) {
flags.Bool("enable-telemetry", c.EnableTelemetry, "enable telemetry (send logs and metrics to a remote server)")
flags.Bool("leader-election", c.LeaderElection, "Enable leader election for operator. Ensures there is only one active operator Pod")
flags.String("leader-election-namespace", c.LeaderElectionNamespace, "Namespace for the leader election lease. Auto-detected from the service account token when empty.")
}

var (
DefaultConfig = Config{
EnableTelemetry: false,
LeaderElection: false,
EnableTelemetry: false,
LeaderElection: false,
LeaderElectionNamespace: "",
}

Cell = cell.Module(
Expand Down
25 changes: 24 additions & 1 deletion operator/cmd/cilium-crds/cells_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ package ciliumcrds

import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"strings"
"sync/atomic"

"github.com/microsoft/retina/internal/buildinfo"
Expand Down Expand Up @@ -42,7 +45,27 @@ import (
"github.com/cilium/statedb"
)

const operatorK8sNamespace = "kube-system"
const (
serviceAccountNamespaceFile = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
errMsgNamespaceFileEmpty = "--leader-election-namespace not set and " + serviceAccountNamespaceFile + " was empty"
)

var errNamespaceFileEmpty = errors.New(errMsgNamespaceFileEmpty)

// resolveNamespace returns the namespace the operator should use for leader election.
func resolveNamespace(override string) (string, error) {
if override != "" {
return override, nil
}
data, err := os.ReadFile(serviceAccountNamespaceFile)
if err != nil {
return "", fmt.Errorf("--leader-election-namespace not set and could not read %s: %w", serviceAccountNamespaceFile, err)
}
if ns := strings.TrimSpace(string(data)); ns != "" {
return ns, nil
}
return "", errNamespaceFileEmpty
}

var (
Operator = cell.Module(
Expand Down
16 changes: 12 additions & 4 deletions operator/cmd/cilium-crds/root_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/cilium/cilium/pkg/option"
"github.com/cilium/hive/cell"
"github.com/microsoft/retina/internal/buildinfo"
"github.com/microsoft/retina/operator/cilium-crds/config"
"github.com/microsoft/retina/pkg/log"
"github.com/pkg/errors"
"github.com/spf13/viper"
Expand Down Expand Up @@ -64,13 +65,19 @@ func Execute(h *hive.Hive) {
func registerOperatorHooks(
l *slog.Logger, lc cell.Lifecycle, llc *LeaderLifecycle,
clientset k8sClient.Clientset, shutdowner hive.Shutdowner,
) {
cfg config.Config,
) error {
leaderElectionNamespace, err := resolveNamespace(cfg.LeaderElectionNamespace)
if err != nil {
return fmt.Errorf("failed to determine operator namespace: %w", err)
}
l.Info("using namespace for leader election lease", "namespace", leaderElectionNamespace)
var wg sync.WaitGroup
lc.Append(cell.Hook{
OnStart: func(cell.HookContext) error {
wg.Add(1)
go func() {
runOperator(l, llc, clientset, shutdowner)
runOperator(l, llc, clientset, shutdowner, leaderElectionNamespace)
wg.Done()
}()
return nil
Expand All @@ -84,6 +91,7 @@ func registerOperatorHooks(
return nil
},
})
return nil
}

func initEnv(vp *viper.Viper) {
Expand Down Expand Up @@ -134,7 +142,7 @@ func doCleanup() {
// runOperator implements the logic of leader election for cilium-operator using
// built-in leader election capability in kubernetes.
// See: https://github.com/kubernetes/client-go/blob/master/examples/leader-election/main.go
func runOperator(l *slog.Logger, lc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner) {
func runOperator(l *slog.Logger, lc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner, leaderElectionNamespace string) {
isLeader.Store(false)

leaderElectionCtx, leaderElectionCtxCancel = context.WithCancel(context.Background())
Expand All @@ -152,7 +160,7 @@ func runOperator(l *slog.Logger, lc *LeaderLifecycle, clientset k8sClient.Client

leResourceLock, err := resourcelock.NewFromKubeconfig(
resourcelock.LeasesResourceLock,
operatorK8sNamespace,
leaderElectionNamespace,
leaderElectionResourceLockName,
resourcelock.ResourceLockConfig{
// Identity name of the lock holder
Expand Down