diff --git a/deploy/standard/manifests/controller/helm/retina/values.yaml b/deploy/standard/manifests/controller/helm/retina/values.yaml index 76e1c7c758..3e993fc7eb 100644 --- a/deploy/standard/manifests/controller/helm/retina/values.yaml +++ b/deploy/standard/manifests/controller/helm/retina/values.yaml @@ -56,6 +56,9 @@ remoteContext: false enableAnnotations: false bypassLookupIPOfInterest: false dataAggregationLevel: "low" +# Static packet sampling for packetparser when using perf event arrays. +# Ignored when packetParserRingBuffer="enabled", because ring buffer back-pressure +# becomes the adaptation mechanism. dataSamplingRate: 1 # Use BPF ring buffers (BPF_MAP_TYPE_RINGBUF) instead of BPF_PERF_EVENT_ARRAY. # Pros: lower per-event overhead at high event rates, simpler variable-sized records, more consistent latency. diff --git a/docs/01-Introduction/01-intro.md b/docs/01-Introduction/01-intro.md index 8ffab06297..e3687bbe04 100644 --- a/docs/01-Introduction/01-intro.md +++ b/docs/01-Introduction/01-intro.md @@ -105,11 +105,12 @@ The following are known system requirements for installing Retina: Community users have reported performance considerations when using **Advanced metrics with the `packetparser` plugin** on nodes with high CPU core counts (32+ cores) under sustained, high-volume network load. -If you plan to deploy Retina in Advanced mode on large node types with network-intensive workloads, consider: - -1. **Start with Basic metrics mode** (does not use `packetparser`) -2. Enable `dataSamplingRate` if you need Advanced metrics -3. Monitor CPU usage and network throughput after deployment -4. See [`packetparser` performance considerations](../03-Metrics/plugins/Linux/packetparser.md#performance-considerations) for more information +If you plan to deploy Retina in Advanced mode on large node types with network-intensive workloads, consider: + +1. **Start with Basic metrics mode** (does not use `packetparser`) +2. Enable `packetParserRingBuffer` if you need Advanced metrics on high-throughput nodes +3. If you stay on perf event arrays, tune `dataSamplingRate` +4. Monitor CPU usage and network throughput after deployment +5. See [`packetparser` performance considerations](../03-Metrics/plugins/Linux/packetparser.md#performance-considerations) for more information The Retina team is evaluating options to address these reported concerns in future releases. diff --git a/docs/02-Installation/03-Config.md b/docs/02-Installation/03-Config.md index 5c9422b206..a1f566bd88 100644 --- a/docs/02-Installation/03-Config.md +++ b/docs/02-Installation/03-Config.md @@ -53,8 +53,8 @@ Apply to both Agent and Operator. * `enableAnnotations`: Enables gathering of metrics for annotated resources. Resources can be annotated with `retina.sh=observe`. Requires the operator and `operator.enableRetinaEndpoint` to be enabled. By enabling annotations, the agent will not use MetricsConfiguration CRD. * `bypassLookupIPOfInterest`: If true, plugins like `packetparser` and `dropreason` will bypass IP lookup, generating an event for each packet regardless. `enableAnnotations` will not work if this is true. * `dataAggregationLevel`: Defines the level of data aggregation for Retina. See [Data Aggregation](../05-Concepts/data-aggregation.md) for more details. -* `dataSamplingRate`: Defines the data sampling rate for `packetparser`. See [Sampling](../03-Metrics/plugins/Linux/packetparser.md#sampling) for more details. -* `packetParserRingBuffer`: Selects the kernel-to-userspace transport for `packetparser`. Accepted values: `enabled` (ring buffer) or `disabled` (perf event array). `auto` is reserved for future use. +* `dataSamplingRate`: Defines the static data sampling rate for `packetparser` when `packetParserRingBuffer=disabled`. See [Sampling](../03-Metrics/plugins/Linux/packetparser.md#sampling) for more details. +* `packetParserRingBuffer`: Selects the kernel-to-userspace transport for `packetparser`. Accepted values: `enabled` (ring buffer) or `disabled` (perf event array). `auto` is reserved for future use. When enabled, `packetparser` relies on ring buffer back-pressure and ignores `dataSamplingRate`. * `packetParserRingBufferSize`: Ring buffer size in bytes when `packetParserRingBuffer=enabled`. Must be a power of two between the kernel page size and 1GiB (inclusive); invalid values cause startup to fail. ## Operator Configuration diff --git a/docs/03-Metrics/plugins/Linux/packetparser.md b/docs/03-Metrics/plugins/Linux/packetparser.md index 3585ccf977..9581dfd7e3 100644 --- a/docs/03-Metrics/plugins/Linux/packetparser.md +++ b/docs/03-Metrics/plugins/Linux/packetparser.md @@ -44,9 +44,10 @@ Alternative data transfer mechanisms like BPF ring buffers (BPF_MAP_TYPE_RINGBUF If you observe performance degradation on high-core-count nodes: 1. **Disable `packetparser`**: Use Basic metrics mode which doesn't require this plugin -2. **Enable Sampling**: Use the `dataSamplingRate` configuration option (see [Sampling](#sampling) section) -3. **Use High Data Aggregation**: Configure `high` [data aggregation](../../../05-Concepts/data-aggregation.md) -4. **Monitor Impact**: Watch for elevated CPU usage, context switches, or throughput changes +2. **Enable Ring Buffers**: Use `packetParserRingBuffer=enabled` and size the shared buffer with `packetParserRingBufferSize` +3. **Enable Sampling**: If you stay on perf event arrays, use `dataSamplingRate` (see [Sampling](#sampling)) +4. **Use High Data Aggregation**: Configure `high` [data aggregation](../../../05-Concepts/data-aggregation.md) +5. **Monitor Impact**: Watch for elevated CPU usage, context switches, or throughput changes **Note:** The Retina team is evaluating options for addressing reported performance concerns, including potential support for alternative data transfer mechanisms. Community feedback and contributions are welcome. @@ -58,6 +59,8 @@ Since `packetparser` produces many enriched `Flow` objects it can be quite expen Keep in mind that there are cases where reporting will happen anyways as to ensure metric accuracy. +When `packetParserRingBuffer=enabled`, `packetparser` ignores `dataSamplingRate`. In that mode the shared BPF ring buffer is the adaptation mechanism: events are emitted normally while capacity exists, and additional events are dropped only when `bpf_ringbuf_reserve()` cannot reserve space. + ### Code locations - Plugin and eBPF code: *pkg/plugin/packetparser/* diff --git a/docs/06-Troubleshooting/performance.md b/docs/06-Troubleshooting/performance.md index 74e247423d..c483af4c8e 100644 --- a/docs/06-Troubleshooting/performance.md +++ b/docs/06-Troubleshooting/performance.md @@ -73,9 +73,27 @@ helm upgrade retina oci://ghcr.io/microsoft/retina/charts/retina \ **Trade-off:** You'll have node-level metrics only, not pod-level metrics. -### Option 2: Enable Data Sampling +### Option 2: Enable Ring Buffer Back-Pressure -Reduce event volume by sampling packets: +Switch `packetparser` to `BPF_MAP_TYPE_RINGBUF` so event dropping only happens when the shared buffer is actually full: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: retina-config + namespace: kube-system +data: + config.yaml: | + packetParserRingBuffer: "enabled" + packetParserRingBufferSize: 8388608 +``` + +**Trade-off:** Uses a fixed amount of locked memory, and burst capacity is bounded by `packetParserRingBufferSize`. + +### Option 3: Enable Data Sampling + +If you stay on perf event arrays, reduce event volume by sampling packets: ```yaml apiVersion: v1 @@ -90,7 +108,9 @@ data: **Trade-off:** Reduced data granularity, but lower overhead. -### Option 3: Use High Data Aggregation Level +**Note:** `dataSamplingRate` is ignored when `packetParserRingBuffer="enabled"`. + +### Option 4: Use High Data Aggregation Level Reduce events at the eBPF level: @@ -107,7 +127,7 @@ data: **Trade-off:** Disables host interface monitoring; API server latency metrics may be less reliable. -### Option 4: Selective Deployment +### Option 5: Selective Deployment Deploy Retina only on nodes where you need detailed observability: @@ -142,7 +162,7 @@ bpftool map list | grep retina bpftool map show name retina_packetparser_events ``` -Currently, `packetparser` uses `BPF_MAP_TYPE_PERF_EVENT_ARRAY`. +By default, `packetparser` uses `BPF_MAP_TYPE_PERF_EVENT_ARRAY`. If `packetParserRingBuffer=enabled`, it uses `BPF_MAP_TYPE_RINGBUF`. ### Monitoring Event Rates (Advanced) diff --git a/pkg/plugin/packetparser/_cprog/packetparser.c b/pkg/plugin/packetparser/_cprog/packetparser.c index 5d63edde0a..4fae0f36f9 100644 --- a/pkg/plugin/packetparser/_cprog/packetparser.c +++ b/pkg/plugin/packetparser/_cprog/packetparser.c @@ -122,7 +122,25 @@ static int parse_tcp_ts(struct tcphdr *tcph, void *data_end, __u32 *tsval, __u32 return -1; } -// Function to parse the packet and send it to the perf buffer. +// Emit a packet to the configured userspace transport. +static __always_inline void emit_packet(struct __sk_buff *skb, struct packet *p) +{ +#ifdef USE_RING_BUFFER + struct packet *event; + + event = bpf_ringbuf_reserve(&retina_packetparser_events, sizeof(*event), 0); + if (!event) { + return; + } + + __builtin_memcpy(event, p, sizeof(*event)); + bpf_ringbuf_submit(event, 0); +#else + bpf_perf_event_output(skb, &retina_packetparser_events, BPF_F_CURRENT_CPU, p, sizeof(*p)); +#endif +} + +// Function to parse the packet and send it to the configured userspace buffer. static void parse(struct __sk_buff *skb, __u8 obs) { struct packet p; @@ -216,20 +234,23 @@ static void parse(struct __sk_buff *skb, __u8 obs) p.conntrack_metadata = conntrack_metadata; #endif // ENABLE_CONNTRACK_METRICS - #ifdef DATA_AGGREGATION_LEVEL + #ifdef DATA_AGGREGATION_LEVEL // Calculate sampling bool sampled __attribute__((unused)); sampled = true; - - #ifdef DATA_SAMPLING_RATE - u32 rand __attribute__((unused)); + + #if defined(TEST_FORCE_UNSAMPLED) && !defined(USE_RING_BUFFER) + // Allow eBPF tests to exercise perf-buffer suppression deterministically. + sampled = false; + #elif defined(DATA_SAMPLING_RATE) && DATA_SAMPLING_RATE > 1 && !defined(USE_RING_BUFFER) + u32 rand __attribute__((unused)); rand = bpf_get_prandom_u32(); if (rand >= UINT32_MAX / DATA_SAMPLING_RATE) { sampled = false; } #endif - + // Process the packet in ct struct packetreport report __attribute__((unused)); report = ct_process_packet(&p, obs, sampled); @@ -239,11 +260,7 @@ static void parse(struct __sk_buff *skb, __u8 obs) p.previously_observed_packets = 0; p.previously_observed_bytes = 0; __builtin_memset(&p.previously_observed_flags, 0, sizeof(struct tcpflagscount)); -#ifdef USE_RING_BUFFER - bpf_ringbuf_output(&retina_packetparser_events, &p, sizeof(p), 0); -#else - bpf_perf_event_output(skb, &retina_packetparser_events, BPF_F_CURRENT_CPU, &p, sizeof(p)); -#endif + emit_packet(skb, &p); return; // If the data aggregation level is high, only send the packet to the perf buffer if it needs to be reported. #elif DATA_AGGREGATION_LEVEL == DATA_AGGREGATION_LEVEL_HIGH @@ -251,11 +268,7 @@ static void parse(struct __sk_buff *skb, __u8 obs) p.previously_observed_packets = report.previously_observed_packets; p.previously_observed_bytes = report.previously_observed_bytes; p.previously_observed_flags = report.previously_observed_flags; -#ifdef USE_RING_BUFFER - bpf_ringbuf_output(&retina_packetparser_events, &p, sizeof(p), 0); -#else - bpf_perf_event_output(skb, &retina_packetparser_events, BPF_F_CURRENT_CPU, &p, sizeof(p)); -#endif + emit_packet(skb, &p); } #endif #endif diff --git a/pkg/plugin/packetparser/packetparser_ebpf_test.go b/pkg/plugin/packetparser/packetparser_ebpf_test.go index d7a6f056e1..4191875d4d 100644 --- a/pkg/plugin/packetparser/packetparser_ebpf_test.go +++ b/pkg/plugin/packetparser/packetparser_ebpf_test.go @@ -614,6 +614,7 @@ type compileOpts struct { aggregationLevel int samplingRate int enableRingBuf bool + forceUnsampled bool } // compileAndLoadVariantBase compiles the packetparser eBPF program with custom @@ -644,7 +645,12 @@ func compileAndLoadVariantBase(t *testing.T, opts compileOpts) (*packetparserObj st += "#define ENABLE_CONNTRACK_METRICS 1\n" } st += fmt.Sprintf("#define DATA_AGGREGATION_LEVEL %d\n", opts.aggregationLevel) - st += fmt.Sprintf("#define DATA_SAMPLING_RATE %d\n", opts.samplingRate) + if !opts.enableRingBuf { + st += fmt.Sprintf("#define DATA_SAMPLING_RATE %d\n", opts.samplingRate) + if opts.forceUnsampled { + st += "#define TEST_FORCE_UNSAMPLED 1\n" + } + } require.NoError(t, os.WriteFile(ppDynamic, []byte(st), 0o644)) // Write conntrack dynamic.h if conntrack metrics enabled. @@ -863,6 +869,67 @@ func TestHighAggregationPreviouslyObserved(t *testing.T) { "expected previously_observed_packets > 0 at HIGH aggregation") } +func TestHighAggregationSamplingSuppressesRepeatedPerfBufferEvents(t *testing.T) { + objs, reader := compileAndLoadVariant(t, compileOpts{ + bypassFilter: 1, + enableConntrack: false, + aggregationLevel: 1, // HIGH + samplingRate: 2147483647, // effectively never sampled + forceUnsampled: true, // keep the test deterministic + }) + + srcIP := net.ParseIP("10.0.16.1") + dstIP := net.ParseIP("10.0.16.2") + + synPkt := ebpftest.BuildTCPPacket(ebpftest.TCPPacketOpts{ + SrcIP: srcIP, DstIP: dstIP, SrcPort: 61000, DstPort: 80, SYN: true, + }) + ebpftest.RunProgram(t, objs.EndpointIngressFilter, synPkt) + ebpftest.AssertNoPerfEvent(t, reader, 100*time.Millisecond) + + ackPkt := ebpftest.BuildTCPPacket(ebpftest.TCPPacketOpts{ + SrcIP: srcIP, DstIP: dstIP, SrcPort: 61000, DstPort: 80, ACK: true, + }) + ebpftest.RunProgram(t, objs.EndpointIngressFilter, ackPkt) + _, ok := ebpftest.ReadPerfEvent[packetparserPacket](t, reader, perfReaderTimeout) + require.True(t, ok, "first ACK should still be reported when the connection has not emitted a prior sample") + + // Once the ACK has established last_report for the connection, repeated + // ACKs should stay suppressed while sampling is forced off. + ebpftest.RunProgram(t, objs.EndpointIngressFilter, ackPkt) + ebpftest.AssertNoPerfEvent(t, reader, 100*time.Millisecond) +} + +func TestHighAggregationRingBufferIgnoresSamplingRate(t *testing.T) { + if err := ensureRingBufKernelSupported(); err != nil { + t.Skipf("ring buffer not supported: %v", err) + } + + objs, reader := compileAndLoadRingBufVariant(t, compileOpts{ + bypassFilter: 1, + enableConntrack: false, + aggregationLevel: 1, // HIGH + samplingRate: 2147483647, // ignored in ring-buffer mode + }) + + srcIP := net.ParseIP("10.0.16.3") + dstIP := net.ParseIP("10.0.16.4") + + synPkt := ebpftest.BuildTCPPacket(ebpftest.TCPPacketOpts{ + SrcIP: srcIP, DstIP: dstIP, SrcPort: 62000, DstPort: 80, SYN: true, + }) + ebpftest.RunProgram(t, objs.EndpointIngressFilter, synPkt) + _, ok := ebpftest.ReadRingBufEvent[packetparserPacket](t, reader, perfReaderTimeout) + require.True(t, ok, "SYN should still be reported at HIGH aggregation") + + ackPkt := ebpftest.BuildTCPPacket(ebpftest.TCPPacketOpts{ + SrcIP: srcIP, DstIP: dstIP, SrcPort: 62000, DstPort: 80, ACK: true, + }) + ebpftest.RunProgram(t, objs.EndpointIngressFilter, ackPkt) + _, ok = ebpftest.ReadRingBufEvent[packetparserPacket](t, reader, perfReaderTimeout) + require.True(t, ok, "ring-buffer mode should ignore dataSamplingRate and report the ACK event") +} + // ============================================================================= // Conntrack map-state verification tests // ============================================================================= diff --git a/pkg/plugin/packetparser/packetparser_linux.go b/pkg/plugin/packetparser/packetparser_linux.go index 005f708542..7cebb6e2e7 100644 --- a/pkg/plugin/packetparser/packetparser_linux.go +++ b/pkg/plugin/packetparser/packetparser_linux.go @@ -127,9 +127,14 @@ func (p *packetParser) Generate(ctx context.Context) error { p.l.Info("data aggregation level", zap.String("level", p.cfg.DataAggregationLevel.String())) st += fmt.Sprintf("#define DATA_AGGREGATION_LEVEL %d\n", p.cfg.DataAggregationLevel) - // Process packetparser sampling rate. - p.l.Info("sampling rate", zap.Uint32("rate", p.cfg.DataSamplingRate)) - st += fmt.Sprintf("#define DATA_SAMPLING_RATE %d\n", p.cfg.DataSamplingRate) + // Perf-buffer mode supports static sampling. Ring-buffer mode relies on + // reserve()/submit() back-pressure instead and therefore ignores the rate. + if p.cfg.PacketParserRingBuffer.IsEnabled() { + p.l.Info("ring buffer back-pressure enabled; ignoring sampling rate", zap.Uint32("rate", p.cfg.DataSamplingRate)) + } else { + p.l.Info("sampling rate", zap.Uint32("rate", p.cfg.DataSamplingRate)) + st += fmt.Sprintf("#define DATA_SAMPLING_RATE %d\n", p.cfg.DataSamplingRate) + } // Generate dynamic header for packetparser. err = loader.WriteFile(ctx, dynamicHeaderPath, st) diff --git a/pkg/plugin/packetparser/packetparser_linux_test.go b/pkg/plugin/packetparser/packetparser_linux_test.go index 4fa4a5394c..94720928f9 100644 --- a/pkg/plugin/packetparser/packetparser_linux_test.go +++ b/pkg/plugin/packetparser/packetparser_linux_test.go @@ -660,6 +660,19 @@ func TestPacketParseGenerate(t *testing.T) { "#define DATA_AGGREGATION_LEVEL 1\n" + "#define DATA_SAMPLING_RATE 0\n", }, + { + name: "RingBufferIgnoresSamplingRate", + cfg: &kcfg.Config{ + EnablePodLevel: true, + BypassLookupIPOfInterest: true, + DataAggregationLevel: kcfg.High, + DataSamplingRate: 99, + PacketParserRingBuffer: kcfg.PacketParserRingBufferEnabled, + PacketParserRingBufferSize: 4096, + }, + expectedContents: "#define BYPASS_LOOKUP_IP_OF_INTEREST 1\n" + + "#define DATA_AGGREGATION_LEVEL 1\n", + }, } for _, tt := range tests {