From 5bb5168a8ea1a7666a939f94e4de2877cda075bb Mon Sep 17 00:00:00 2001 From: aerosta <63026763+aerosta@users.noreply.github.com> Date: Fri, 3 Apr 2026 20:01:06 -0400 Subject: [PATCH] fix: restore test cases after batched upload truncation --- deepeval/test_run/test_run.py | 189 ++++++++++--------- tests/test_core/test_run/test_run_manager.py | 108 ++++++++++- 2 files changed, 208 insertions(+), 89 deletions(-) diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py index b0c98ec3a7..9f8ed10cde 100644 --- a/deepeval/test_run/test_run.py +++ b/deepeval/test_run/test_run.py @@ -862,108 +862,121 @@ def post_test_run(self, test_run: TestRun) -> Optional[Tuple[str, str]]: "Sending a large test run to Confident, this might take a bit longer than usual..." ) - #################### - ### POST REQUEST ### - #################### - if is_conversational_run: - test_run.conversational_test_cases = initial_batch - else: - test_run.test_cases = initial_batch + original_test_cases = test_run.test_cases + original_conversational_test_cases = test_run.conversational_test_cases + original_prompts = test_run.prompts try: - test_run.prompts = None - body = test_run.model_dump(by_alias=True, exclude_none=True) - except AttributeError: - # Pydantic version below 2.0 - body = test_run.dict(by_alias=True, exclude_none=True) + #################### + ### POST REQUEST ### + #################### + if is_conversational_run: + test_run.conversational_test_cases = initial_batch + else: + test_run.test_cases = initial_batch - json_str = json.dumps(body, cls=TestRunEncoder) - body = json.loads(json_str) + try: + test_run.prompts = None + body = test_run.model_dump(by_alias=True, exclude_none=True) + except AttributeError: + # Pydantic version below 2.0 + body = test_run.dict(by_alias=True, exclude_none=True) - data, link = api.send_request( - method=HttpMethods.POST, - endpoint=Endpoints.TEST_RUN_ENDPOINT, - body=body, - ) + json_str = json.dumps(body, cls=TestRunEncoder) + body = json.loads(json_str) - if not isinstance(data, dict) or "id" not in data: - # try to show helpful details - detail = None - if isinstance(data, dict): - detail = ( - data.get("detail") - or data.get("message") - or data.get("error") - ) - # fall back to repr for visibility - raise RuntimeError( - f"Confident API response missing 'id'. " - f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}" + data, link = api.send_request( + method=HttpMethods.POST, + endpoint=Endpoints.TEST_RUN_ENDPOINT, + body=body, ) - res = TestRunHttpResponse( - id=data["id"], - ) + if not isinstance(data, dict) or "id" not in data: + # try to show helpful details + detail = None + if isinstance(data, dict): + detail = ( + data.get("detail") + or data.get("message") + or data.get("error") + ) + # fall back to repr for visibility + raise RuntimeError( + f"Confident API response missing 'id'. " + f"detail={detail!r} raw={type(data).__name__}:{repr(data)[:500]}" + ) - ################################################ - ### Send the remaining test cases in batches ### - ################################################ - total_remaining = len(remaining_test_cases_to_process) - num_remaining_batches = ( - (total_remaining + BATCH_SIZE - 1) // BATCH_SIZE - if total_remaining > 0 - else 0 - ) + res = TestRunHttpResponse( + id=data["id"], + ) - for i in range(num_remaining_batches): - start_index = i * BATCH_SIZE - batch = remaining_test_cases_to_process[ - start_index : start_index + BATCH_SIZE - ] + ################################################ + ### Send the remaining test cases in batches ### + ################################################ + total_remaining = len(remaining_test_cases_to_process) + num_remaining_batches = ( + (total_remaining + BATCH_SIZE - 1) // BATCH_SIZE + if total_remaining > 0 + else 0 + ) - if len(batch) == 0: - break # Should not happen with correct num_remaining_batches, but as a safeguard + for i in range(num_remaining_batches): + start_index = i * BATCH_SIZE + batch = remaining_test_cases_to_process[ + start_index : start_index + BATCH_SIZE + ] - # Create RemainingTestRun with the correct list populated - if is_conversational_run: - remaining_test_run = RemainingTestRun( - testRunId=res.id, - testCases=[], # This will be empty - conversationalTestCases=batch, - ) - else: - remaining_test_run = RemainingTestRun( - testRunId=res.id, - testCases=batch, - conversationalTestCases=[], # This will be empty - ) + if len(batch) == 0: + break # Should not happen with correct num_remaining_batches, but as a safeguard - body = None - try: - body = remaining_test_run.model_dump( - by_alias=True, exclude_none=True - ) - except AttributeError: - # Pydantic version below 2.0 - body = remaining_test_run.dict(by_alias=True, exclude_none=True) + # Create RemainingTestRun with the correct list populated + if is_conversational_run: + remaining_test_run = RemainingTestRun( + testRunId=res.id, + testCases=[], # This will be empty + conversationalTestCases=batch, + ) + else: + remaining_test_run = RemainingTestRun( + testRunId=res.id, + testCases=batch, + conversationalTestCases=[], # This will be empty + ) - try: - _, _ = api.send_request( - method=HttpMethods.PUT, - endpoint=Endpoints.TEST_RUN_ENDPOINT, - body=body, - ) - except Exception as e: - message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}" - raise Exception(message) from e + body = None + try: + body = remaining_test_run.model_dump( + by_alias=True, exclude_none=True + ) + except AttributeError: + # Pydantic version below 2.0 + body = remaining_test_run.dict( + by_alias=True, exclude_none=True + ) - console.print( - "[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on " - f"[link={link}]{link}[/link]" - ) - self.save_final_test_run_link(link) - open_browser(link) - return link, res.id + try: + _, _ = api.send_request( + method=HttpMethods.PUT, + endpoint=Endpoints.TEST_RUN_ENDPOINT, + body=body, + ) + except Exception as e: + message = f"Unexpected error when sending some test cases. Incomplete test run available at {link}" + raise Exception(message) from e + + console.print( + "[rgb(5,245,141)]✓[/rgb(5,245,141)] Done 🎉! View results on " + f"[link={link}]{link}[/link]" + ) + self.save_final_test_run_link(link) + open_browser(link) + return link, res.id + finally: + test_run.test_cases = original_test_cases + test_run.conversational_test_cases = ( + original_conversational_test_cases + ) + test_run.prompts = original_prompts def save_test_run_locally(self): local_folder = os.getenv("DEEPEVAL_RESULTS_FOLDER") diff --git a/tests/test_core/test_run/test_run_manager.py b/tests/test_core/test_run/test_run_manager.py index b6498bbc73..3b9a38d280 100644 --- a/tests/test_core/test_run/test_run_manager.py +++ b/tests/test_core/test_run/test_run_manager.py @@ -1,12 +1,18 @@ import os import portalocker +import pytest import deepeval.test_run.test_run as tr_mod from types import SimpleNamespace from deepeval.test_case import LLMTestCase -from deepeval.test_run.test_run import TestRunManager, LLMApiTestCase +from deepeval.test_run.test_run import ( + LLMApiTestCase, + PromptData, + TestRun, + TestRunManager, +) from tests.test_core.helpers import _make_fake_portalocker from tests.test_core.stubs import RecordingPortalockerLock @@ -150,3 +156,103 @@ def fake_fsync(fd: int) -> None: fsync_calls ), "save_test_run(..., save_under_key=...) should call os.fsync(file.fileno())" assert fsync_calls[-1] == f.fileno() + + +def _make_api_test_cases(count: int): + return [ + LLMApiTestCase( + name=f"tc{i}", + input=f"in-{i}", + actual_output=f"out-{i}", + order=i, + ) + for i in range(count) + ] + + +def test_post_test_run_restores_full_test_case_list_after_batched_upload( + monkeypatch, +): + trm = TestRunManager() + test_cases = _make_api_test_cases(45) + prompts = [PromptData(alias="prompt-1")] + test_run = TestRun(testCases=test_cases, prompts=prompts) + original_test_cases = test_run.test_cases + original_prompts = test_run.prompts + sent_batches = [] + + class FakeApi: + def send_request(self, method, endpoint, body): + sent_batches.append( + ( + method, + len(body["testCases"]), + len(body["conversationalTestCases"]), + ) + ) + if method == tr_mod.HttpMethods.POST: + return {"id": "run-id"}, "https://confident.example/run-id" + return {"ok": True}, None + + monkeypatch.setattr(tr_mod, "Api", FakeApi) + monkeypatch.setattr(tr_mod, "open_browser", lambda link: None) + monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None) + monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None) + + result = trm.post_test_run(test_run) + + assert result == ("https://confident.example/run-id", "run-id") + assert test_run.test_cases is original_test_cases + assert len(test_run.test_cases) == 45 + assert test_run.test_cases[0].name == "tc0" + assert test_run.test_cases[-1].name == "tc44" + assert test_run.prompts is original_prompts + assert sent_batches == [ + (tr_mod.HttpMethods.POST, 40, 0), + (tr_mod.HttpMethods.PUT, 5, 0), + ] + + +def test_post_test_run_restores_full_test_case_list_when_batch_upload_fails( + monkeypatch, +): + trm = TestRunManager() + test_cases = _make_api_test_cases(45) + prompts = [PromptData(alias="prompt-1")] + test_run = TestRun(testCases=test_cases, prompts=prompts) + original_test_cases = test_run.test_cases + original_prompts = test_run.prompts + sent_batches = [] + + class FakeApi: + def send_request(self, method, endpoint, body): + sent_batches.append( + ( + method, + len(body["testCases"]), + len(body["conversationalTestCases"]), + ) + ) + if method == tr_mod.HttpMethods.POST: + return {"id": "run-id"}, "https://confident.example/run-id" + raise RuntimeError("upload failed") + + monkeypatch.setattr(tr_mod, "Api", FakeApi) + monkeypatch.setattr(tr_mod, "open_browser", lambda link: None) + monkeypatch.setattr(tr_mod.console, "print", lambda *args, **kwargs: None) + monkeypatch.setattr(trm, "save_final_test_run_link", lambda link: None) + + with pytest.raises( + Exception, match="Unexpected error when sending some test cases" + ): + trm.post_test_run(test_run) + + assert test_run.test_cases is original_test_cases + assert len(test_run.test_cases) == 45 + assert test_run.test_cases[0].name == "tc0" + assert test_run.test_cases[-1].name == "tc44" + assert test_run.prompts is original_prompts + assert sent_batches == [ + (tr_mod.HttpMethods.POST, 40, 0), + (tr_mod.HttpMethods.PUT, 5, 0), + ]