Support removing keys from HTTP request bodies (#661)

sjmonson · web-flow · commit f89486208577 · 2026-03-27T10:42:14.000-04:00
## Summary Add the ability to omit keys from the request body by setting them to `null` in `--backend-kwargs`. ## Details This PR makes it so any value in the request body that is `None` will be omitted from the body before being sent. ## Test Plan `--backend-kwargs '{"extras":"body":{"stream_options":{"continuous_usage_stats": None}}}'` ## Related Issues  - Resolves #603 --- - [x] "I certify that all code in this PR is my own, except as noted below." ## Use of AI - [x] Includes AI-assisted code completion - [ ] Includes code generated by an AI application - [x] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`)
diff --git a/src/guidellm/backends/openai/http.py b/src/guidellm/backends/openai/http.py
@@ -26,6 +26,7 @@
     GenerationResponse,
     RequestInfo,
 )
+from guidellm.utils.dict import deep_filter
 
 __all__ = [
     "OpenAIHTTPBackend",
@@ -390,6 +391,8 @@ async def resolve(  # type: ignore[override, misc]
             if arguments.files
             else None
         )
+        # Omit `None` from output JSON
+        deep_filter(arguments.body or {}, lambda _, v: v is not None)
         request_json = arguments.body if not request_files else None
         request_data = arguments.body if request_files else None
 
diff --git a/src/guidellm/schemas/request.py b/src/guidellm/schemas/request.py
@@ -15,6 +15,7 @@
 from pydantic import Field, computed_field
 
 from guidellm.schemas.base import StandardBaseDict, StandardBaseModel
+from guidellm.utils.dict import deep_update
 
 __all__ = [
     "GenerationRequest",
@@ -82,7 +83,8 @@ def model_combine(
         for combine in ("headers", "params", "body", "files"):
             if (val := additional_dict.get(combine)) is not None:
                 current = getattr(self, combine, None) or {}
-                setattr(self, combine, {**current, **val})
+                deep_update(current, val)
+                setattr(self, combine, current)
 
         return self
 
diff --git a/src/guidellm/utils/dict.py b/src/guidellm/utils/dict.py
@@ -1,3 +1,11 @@
+"""Utility functions for working with dictionaries."""
+
+from __future__ import annotations
+
+from collections.abc import Callable, Hashable
+from typing import Any
+
+
 def recursive_key_update(d, key_update_func):
     if not isinstance(d, dict) and not isinstance(d, list):
         return d
@@ -21,3 +29,31 @@ def recursive_key_update(d, key_update_func):
     for _, value in d.items():
         recursive_key_update(value, key_update_func)
     return d
+
+
+def deep_update(dict1: dict, dict2: dict) -> None:
+    """
+    Update dict1 with values from dict2 recursively.
+
+    Modifies dict1 in-place. Does not handle circular references.
+    Does not copy values. Does not merge lists.
+    """
+    for key, val in dict2.items():
+        if isinstance(val, dict) and key in dict1 and isinstance(dict1[key], dict):
+            deep_update(dict1[key], val)
+        else:
+            dict1[key] = val
+
+
+def deep_filter(d: dict, predicate: Callable[[Hashable, Any], bool]) -> None:
+    """
+    Recursively filters a dictionary based on a predicate function.
+
+    Modifies the input dictionary in-place. Does not handle circular references.
+    Does not copy values. Does not filter lists.
+    """
+    for key, value in list(d.items()):
+        if isinstance(value, dict):
+            deep_filter(value, predicate)
+        elif not predicate(key, value):
+            d.pop(key)
diff --git a/tests/unit/backends/openai/test_http.py b/tests/unit/backends/openai/test_http.py
@@ -718,3 +718,83 @@ async def test_resolve_stream(
         with handler_patch:
             async for _response, _info in backend.resolve(request, request_info):
                 pass
+
+    @pytest.mark.smoke
+    @pytest.mark.asyncio
+    @async_timeout(10.0)
+    async def test_resolve_filters_none_from_request_body(
+        self,
+        httpx_mock: HTTPXMock,
+        mock_request_handler,
+    ):
+        """
+        Test that None values are filtered from request body.
+
+        This is a simple integration test confirming the backend works with
+        None filtering. Deep testing of None filtering is covered by test_dict.py.
+
+        ### WRITTEN BY AI ###
+        """
+        # Track the actual request body sent
+        sent_body = None
+
+        def capture_request(request: httpx.Request):
+            nonlocal sent_body
+            sent_body = json.loads(request.content)
+            return httpx.Response(
+                status_code=200,
+                json={"choices": [{"message": {"content": "Response"}}]},
+            )
+
+        httpx_mock.add_callback(capture_request, url="http://test/v1/chat/completions")
+
+        backend = OpenAIHTTPBackend(
+            target="http://test",
+            model="test-model",
+            request_format="chat_completions",
+        )
+        await backend.process_startup()
+
+        request = GenerationRequest()
+        request_info = RequestInfo(
+            request_id="test-id",
+            status="pending",
+            scheduler_node_id=1,
+            scheduler_process_id=1,
+            scheduler_start_time=123.0,
+            timings=RequestTimings(),
+        )
+
+        # Configure mock handler to return body with None values
+        mock_handler, handler_patch = mock_request_handler
+        mock_handler.format.return_value = GenerationRequestArguments(
+            body={
+                "model": "gpt-4",
+                "messages": [{"role": "user", "content": "test"}],
+                "temperature": 0.7,
+                "max_tokens": None,  # Should be filtered out
+                "top_p": None,  # Should be filtered out
+                "stream": None,  # Should be filtered out
+            }
+        )
+        mock_handler.compile_non_streaming.return_value = GenerationResponse(
+            request_id="test-id", request_args="test args"
+        )
+
+        with handler_patch:
+            responses = []
+            async for response, info in backend.resolve(request, request_info):
+                responses.append((response, info))
+
+        # Verify the backend processed the request successfully
+        assert len(responses) == 1
+        assert responses[0][0].request_id == "test-id"
+
+        # Verify that None values were filtered out from the sent body
+        assert sent_body is not None
+        assert "model" in sent_body
+        assert "messages" in sent_body
+        assert "temperature" in sent_body
+        assert "max_tokens" not in sent_body  # None value filtered
+        assert "top_p" not in sent_body  # None value filtered
+        assert "stream" not in sent_body  # None value filtered
diff --git a/tests/unit/schemas/test_request.py b/tests/unit/schemas/test_request.py
@@ -235,6 +235,133 @@ def test_marshalling(self, valid_instances):
         for key, expected_value in constructor_args.items():
             assert getattr(reconstructed, key) == expected_value
 
+    @pytest.mark.regression
+    def test_model_combine_deep_merge_nested_dicts(self):
+        """
+        Test that nested dicts in headers and body are merged, not replaced.
+
+        This is the PRIMARY REGRESSION TEST for the shallow merge bug fix.
+        Before the fix, merging nested dicts would completely replace them.
+        After the fix using deep_update(), nested values are properly merged.
+
+        ### WRITTEN BY AI ###
+        """
+        base = GenerationRequestArguments(
+            headers={"Authorization": "Bearer token1"},
+            body={"model": "gpt-4", "parameters": {"temperature": 0.5, "top_p": 0.9}},
+        )
+        additional = GenerationRequestArguments(
+            headers={"Content-Type": "application/json"},
+            body={"parameters": {"temperature": 0.7, "max_tokens": 100}},
+        )
+
+        result = base.model_combine(additional)
+
+        # Headers should be merged (both keys preserved)
+        assert result.headers == {
+            "Authorization": "Bearer token1",
+            "Content-Type": "application/json",
+        }
+
+        # Body should be merged, with nested parameters also merged
+        assert result.body == {
+            "model": "gpt-4",
+            "parameters": {
+                "temperature": 0.7,  # Overwritten
+                "top_p": 0.9,  # Preserved from base
+                "max_tokens": 100,  # Added from additional
+            },
+        }
+
+    @pytest.mark.regression
+    def test_model_combine_deep_merge_params(self):
+        """
+        Test deep merge for params field.
+
+        ### WRITTEN BY AI ###
+        """
+        base = GenerationRequestArguments(
+            params={"page": 1, "filters": {"type": "active", "status": "open"}},
+        )
+        additional = GenerationRequestArguments(
+            params={"limit": 10, "filters": {"type": "archived"}},
+        )
+
+        result = base.model_combine(additional)
+
+        assert result.params == {
+            "page": 1,
+            "limit": 10,
+            "filters": {
+                "type": "archived",  # Overwritten
+                "status": "open",  # Preserved
+            },
+        }
+
+    @pytest.mark.regression
+    def test_model_combine_deep_merge_files(self):
+        """
+        Test deep merge for files field.
+
+        ### WRITTEN BY AI ###
+        """
+        base = GenerationRequestArguments(
+            files={"file1": "data1", "config": {"format": "json", "encoding": "utf-8"}},
+        )
+        additional = GenerationRequestArguments(
+            files={"file2": "data2", "config": {"compress": True}},
+        )
+
+        result = base.model_combine(additional)
+
+        assert result.files == {
+            "file1": "data1",
+            "file2": "data2",
+            "config": {
+                "format": "json",  # Preserved
+                "encoding": "utf-8",  # Preserved
+                "compress": True,  # Added
+            },
+        }
+
+    @pytest.mark.regression
+    def test_model_combine_multiple_levels_nesting(self):
+        """
+        Test deep merge with 3+ levels of nesting.
+
+        ### WRITTEN BY AI ###
+        """
+        base = GenerationRequestArguments(
+            body={
+                "level1": {
+                    "level2": {
+                        "level3": {"a": 1, "b": 2},
+                        "other": "value",
+                    },
+                },
+            },
+        )
+        additional = GenerationRequestArguments(
+            body={
+                "level1": {
+                    "level2": {
+                        "level3": {"b": 99, "c": 3},
+                    },
+                },
+            },
+        )
+
+        result = base.model_combine(additional)
+
+        assert result.body == {
+            "level1": {
+                "level2": {
+                    "level3": {"a": 1, "b": 99, "c": 3},
+                    "other": "value",
+                },
+            },
+        }
+
 
 class TestUsageMetrics:
     """Test cases for UsageMetrics model."""
diff --git a/tests/unit/utils/dict.py b/tests/unit/utils/dict.py

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@`
`26`	`26`	`GenerationResponse,`
`27`	`27`	`RequestInfo,`
`28`	`28`	`)`
	`29`	`+from guidellm.utils.dict import deep_filter`
`29`	`30`
`30`	`31`	`__all__ = [`
`31`	`32`	`"OpenAIHTTPBackend",`
`@@ -390,6 +391,8 @@ async def resolve( # type: ignore[override, misc]`
`390`	`391`	`if arguments.files`
`391`	`392`	`else None`
`392`	`393`	`)`
	`394`	+ # Omit `None` from output JSON
	`395`	`+ deep_filter(arguments.body or {}, lambda _, v: v is not None)`
`393`	`396`	`request_json = arguments.body if not request_files else None`
`394`	`397`	`request_data = arguments.body if request_files else None`
`395`	`398`