feat(batch-evaluation): allow passing fields param for efficient trace fetching (#1502)

hassiebp · web-flow · commit 0fd6338bba6a · 2026-01-29T11:51:32.000+02:00
* feat(batch-evaluation): allow passing fields param for efficient trace fetching

* push
diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py
@@ -79,12 +79,12 @@
 from langfuse._utils.parse_error import handle_fern_exception
 from langfuse._utils.prompt_cache import PromptCache
 from langfuse.api.resources.commons.errors.error import Error
+from langfuse.api.resources.commons.errors.not_found_error import NotFoundError
 from langfuse.api.resources.commons.types import DatasetRunWithItems
 from langfuse.api.resources.datasets.types import (
     DeleteDatasetRunResponse,
     PaginatedDatasetRuns,
 )
-from langfuse.api.resources.commons.errors.not_found_error import NotFoundError
 from langfuse.api.resources.ingestion.types.score_body import ScoreBody
 from langfuse.api.resources.prompts.types import (
     CreatePromptRequest_Chat,
@@ -3096,6 +3096,7 @@ def run_batched_evaluation(
         mapper: MapperFunction,
         filter: Optional[str] = None,
         fetch_batch_size: int = 50,
+        fetch_trace_fields: Optional[str] = None,
         max_items: Optional[int] = None,
         max_retries: int = 3,
         evaluators: List[EvaluatorFunction],
@@ -3138,6 +3139,7 @@ def run_batched_evaluation(
                 Default: None (fetches all items).
             fetch_batch_size: Number of items to fetch per API call and hold in memory.
                 Larger values may be faster but use more memory. Default: 50.
+            fetch_trace_fields: Comma-separated list of fields to include when fetching traces. Available field groups: 'core' (always included), 'io' (input, output, metadata), 'scores', 'observations', 'metrics'. If not specified, all fields are returned. Example: 'core,scores,metrics'. Note: Excluded 'observations' or 'scores' fields return empty arrays; excluded 'metrics' returns -1 for 'totalCost' and 'latency'. Only relevant if scope is 'traces'.
             max_items: Maximum total number of items to process. If None, processes all
                 items matching the filter. Useful for testing or limiting evaluation runs.
                 Default: None (process all).
@@ -3306,6 +3308,7 @@ def composite_evaluator(*, item, evaluations):
                     evaluators=evaluators,
                     filter=filter,
                     fetch_batch_size=fetch_batch_size,
+                    fetch_trace_fields=fetch_trace_fields,
                     max_items=max_items,
                     max_concurrency=max_concurrency,
                     composite_evaluator=composite_evaluator,
diff --git a/langfuse/batch_evaluation.py b/langfuse/batch_evaluation.py
@@ -847,6 +847,7 @@ async def run_async(
         evaluators: List[EvaluatorFunction],
         filter: Optional[str] = None,
         fetch_batch_size: int = 50,
+        fetch_trace_fields: Optional[str] = None,
         max_items: Optional[int] = None,
         max_concurrency: int = 50,
         composite_evaluator: Optional[CompositeEvaluatorFunction] = None,
@@ -867,6 +868,7 @@ async def run_async(
             evaluators: List of evaluation functions to run on each item.
             filter: JSON filter string for querying items.
             fetch_batch_size: Number of items to fetch per API call.
+            fetch_trace_fields: Comma-separated list of fields to include when fetching traces. Available field groups: 'core' (always included), 'io' (input, output, metadata), 'scores', 'observations', 'metrics'. If not specified, all fields are returned. Example: 'core,scores,metrics'. Note: Excluded 'observations' or 'scores' fields return empty arrays; excluded 'metrics' returns -1 for 'totalCost' and 'latency'. Only relevant if scope is 'traces'.
             max_items: Maximum number of items to process (None = all).
             max_concurrency: Maximum number of concurrent evaluations.
             composite_evaluator: Optional function to create composite scores.
@@ -913,6 +915,8 @@ async def run_async(
 
         if verbose:
             self._log.info(f"Starting batch evaluation on {scope}")
+            if scope == "traces" and fetch_trace_fields:
+                self._log.info(f"Fetching trace fields: {fetch_trace_fields}")
             if resume_from:
                 self._log.info(
                     f"Resuming from {resume_from.last_processed_timestamp} "
@@ -936,6 +940,7 @@ async def run_async(
                     page=page,
                     limit=fetch_batch_size,
                     max_retries=max_retries,
+                    fields=fetch_trace_fields,
                 )
             except Exception as e:
                 # Failed after max_retries - create resume token and return
@@ -1115,6 +1120,7 @@ async def _fetch_batch_with_retry(
         page: int,
         limit: int,
         max_retries: int,
+        fields: Optional[str],
     ) -> List[Union[TraceWithFullDetails, ObservationsView]]:
         """Fetch a batch of items with retry logic.
 
@@ -1125,6 +1131,7 @@ async def _fetch_batch_with_retry(
             limit: Number of items per page.
             max_retries: Maximum number of retry attempts.
             verbose: Whether to log retry attempts.
+            fields: Trace fields to fetch
 
         Returns:
             List of items from the API.
@@ -1138,6 +1145,7 @@ async def _fetch_batch_with_retry(
                 limit=limit,
                 filter=filter,
                 request_options={"max_retries": max_retries},
+                fields=fields,
             )  # type: ignore
             return list(response.data)  # type: ignore
         elif scope == "observations":