From 6335b2472e31e783aa41e0192d872c70077574f2 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Fri, 12 Jun 2026 14:56:14 -0700 Subject: [PATCH 1/6] fix(amber): add timeouts and retries to dataset file-service requests DatasetFileDocument made two requests.get() calls (presigned-URL fetch and file download) with no timeout, so a hung or unreachable file-service would block the worker thread indefinitely. Route both calls through a Session with a (10s connect, 60s read) timeout and a urllib3 Retry policy (3 retries, exponential backoff, retrying on connection errors and 5xx). Both calls are idempotent GETs, so retrying is safe. --- .../pytexera/storage/dataset_file_document.py | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/amber/src/main/python/pytexera/storage/dataset_file_document.py b/amber/src/main/python/pytexera/storage/dataset_file_document.py index 31f95d3fc72..25e816b504c 100644 --- a/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -19,6 +19,34 @@ import os import requests import urllib.parse +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# (connect, read) timeout and retry settings for the file-service GETs below. +_CONNECT_TIMEOUT_SECONDS = 10 +_READ_TIMEOUT_SECONDS = 60 +_REQUEST_TIMEOUT = (_CONNECT_TIMEOUT_SECONDS, _READ_TIMEOUT_SECONDS) +_MAX_RETRIES = 3 +_RETRY_BACKOFF_FACTOR = 0.5 +_RETRY_STATUS_FORCELIST = (500, 502, 503, 504) + + +def _build_session() -> requests.Session: + """Returns a Session that retries GETs on connection errors and 5xx.""" + retry = Retry( + total=_MAX_RETRIES, + connect=_MAX_RETRIES, + read=_MAX_RETRIES, + backoff_factor=_RETRY_BACKOFF_FACTOR, + status_forcelist=_RETRY_STATUS_FORCELIST, + allowed_methods=frozenset({"GET"}), + raise_on_status=False, + ) + adapter = HTTPAdapter(max_retries=retry) + session = requests.Session() + session.mount("http://", adapter) + session.mount("https://", adapter) + return session class DatasetFileDocument: @@ -69,7 +97,13 @@ def get_presigned_url(self) -> str: params = {"filePath": encoded_file_path} - response = requests.get(self.presign_endpoint, headers=headers, params=params) + with _build_session() as session: + response = session.get( + self.presign_endpoint, + headers=headers, + params=params, + timeout=_REQUEST_TIMEOUT, + ) if response.status_code != 200: raise RuntimeError( @@ -100,7 +134,8 @@ def read_file(self) -> io.BytesIO: :raises: RuntimeError if the retrieval fails. """ presigned_url = self.get_presigned_url() - response = requests.get(presigned_url) + with _build_session() as session: + response = session.get(presigned_url, timeout=_REQUEST_TIMEOUT) if response.status_code != 200: raise RuntimeError( From bd80e385c4d9c2c765bd04ce4169b4769dc12fc5 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Sat, 13 Jun 2026 22:56:46 -0700 Subject: [PATCH 2/6] fixed test_dataset_file_document.py --- .../storage/test_dataset_file_document.py | 104 +++++++++++++++--- 1 file changed, 90 insertions(+), 14 deletions(-) diff --git a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py index 680f512072b..b5909c43acd 100644 --- a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py +++ b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py @@ -20,7 +20,13 @@ import pytest from unittest.mock import patch, MagicMock -from pytexera.storage.dataset_file_document import DatasetFileDocument +from pytexera.storage.dataset_file_document import ( + DatasetFileDocument, + _build_session, + _REQUEST_TIMEOUT, + _MAX_RETRIES, + _RETRY_STATUS_FORCELIST, +) DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download" @@ -95,7 +101,9 @@ def _make_doc(self, monkeypatch, path="/bob@x.com/ds/v1/file.csv"): def test_returns_presigned_url_field_from_json_body(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response( 200, body={"presignedUrl": "https://signed.test/x"} ) @@ -103,7 +111,9 @@ def test_returns_presigned_url_field_from_json_body(self, monkeypatch): def test_sends_bearer_authorization_header_with_jwt(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(200, body={"presignedUrl": "u"}) doc.get_presigned_url() _, kwargs = mock_get.call_args @@ -113,7 +123,9 @@ def test_url_encodes_filepath_query_parameter(self, monkeypatch): # urllib.parse.quote keeps "/" as safe by default, but encodes "@" # and " " — pin both pieces so the contract is explicit. doc = self._make_doc(monkeypatch, path="/bob@x.com/ds/v1/data file.csv") - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(200, body={"presignedUrl": "u"}) doc.get_presigned_url() _, kwargs = mock_get.call_args @@ -124,7 +136,9 @@ def test_url_encodes_filepath_query_parameter(self, monkeypatch): def test_calls_configured_endpoint(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(200, body={"presignedUrl": "u"}) doc.get_presigned_url() args, _ = mock_get.call_args @@ -132,21 +146,27 @@ def test_calls_configured_endpoint(self, monkeypatch): def test_raises_runtime_error_with_status_and_body_on_failure(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(403, body="forbidden") with pytest.raises(RuntimeError, match=r"403.*forbidden"): doc.get_presigned_url() def test_raises_when_response_body_lacks_presigned_url_key(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(200, body={"other": "value"}) with pytest.raises(RuntimeError, match="'presignedUrl' missing"): doc.get_presigned_url() def test_raises_when_response_body_is_not_valid_json(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: response = MagicMock() response.status_code = 200 response.json.side_effect = ValueError("Expecting value") @@ -157,14 +177,18 @@ def test_raises_when_response_body_is_not_valid_json(self, monkeypatch): def test_raises_when_presigned_url_is_empty_string(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(200, body={"presignedUrl": ""}) with pytest.raises(RuntimeError, match="'presignedUrl' missing"): doc.get_presigned_url() def test_raises_when_presigned_url_is_not_a_string(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(200, body={"presignedUrl": None}) with pytest.raises(RuntimeError, match="'presignedUrl' missing"): doc.get_presigned_url() @@ -178,7 +202,9 @@ def _make_doc(self, monkeypatch): def test_returns_bytesio_with_downloaded_content(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.side_effect = [ make_response(200, body={"presignedUrl": "https://signed.test/x"}), make_response(200, content=b"hello-bytes"), @@ -189,14 +215,18 @@ def test_returns_bytesio_with_downloaded_content(self, monkeypatch): def test_propagates_presigned_url_failure(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.return_value = make_response(500, body="upstream down") with pytest.raises(RuntimeError, match=r"500.*upstream down"): doc.read_file() def test_raises_runtime_error_when_download_fails(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.side_effect = [ make_response(200, body={"presignedUrl": "https://signed.test/x"}), make_response(404, body="missing"), @@ -206,7 +236,9 @@ def test_raises_runtime_error_when_download_fails(self, monkeypatch): def test_downloads_from_presigned_url_returned_by_first_call(self, monkeypatch): doc = self._make_doc(monkeypatch) - with patch("pytexera.storage.dataset_file_document.requests.get") as mock_get: + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: mock_get.side_effect = [ make_response(200, body={"presignedUrl": "https://signed.test/x"}), make_response(200, content=b""), @@ -214,3 +246,47 @@ def test_downloads_from_presigned_url_returned_by_first_call(self, monkeypatch): doc.read_file() second_call_args, _ = mock_get.call_args_list[1] assert second_call_args[0] == "https://signed.test/x" + + +class TestTimeoutsAndRetries: + def _make_doc(self, monkeypatch): + monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token") + monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", CUSTOM_ENDPOINT) + return DatasetFileDocument("/bob@x.com/ds/v1/file.csv") + + def test_presigned_url_request_passes_request_timeout(self, monkeypatch): + doc = self._make_doc(monkeypatch) + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: + mock_get.return_value = make_response(200, body={"presignedUrl": "u"}) + doc.get_presigned_url() + _, kwargs = mock_get.call_args + assert kwargs["timeout"] == _REQUEST_TIMEOUT + + def test_download_request_passes_request_timeout(self, monkeypatch): + doc = self._make_doc(monkeypatch) + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: + mock_get.side_effect = [ + make_response(200, body={"presignedUrl": "https://signed.test/x"}), + make_response(200, content=b"data"), + ] + doc.read_file() + _, download_kwargs = mock_get.call_args_list[1] + assert download_kwargs["timeout"] == _REQUEST_TIMEOUT + + def test_session_mounts_retry_adapter_for_http_and_https(self): + session = _build_session() + try: + for prefix in ("http://", "https://"): + retry = session.get_adapter(prefix).max_retries + assert retry.total == _MAX_RETRIES + assert retry.connect == _MAX_RETRIES + assert retry.read == _MAX_RETRIES + assert set(retry.status_forcelist) == set(_RETRY_STATUS_FORCELIST) + # Only idempotent GETs should be retried. + assert retry.allowed_methods == frozenset({"GET"}) + finally: + session.close() From 206ac02b91c807048b994e21550b81410dd0e929 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Sun, 14 Jun 2026 08:50:05 -0700 Subject: [PATCH 3/6] added error handling exception for recievingh presigned url session --- .../pytexera/storage/dataset_file_document.py | 28 +++++++++++++------ .../storage/test_dataset_file_document.py | 25 ++++++++++++++++- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/amber/src/main/python/pytexera/storage/dataset_file_document.py b/amber/src/main/python/pytexera/storage/dataset_file_document.py index 25e816b504c..8834ffb5cd1 100644 --- a/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -97,13 +97,18 @@ def get_presigned_url(self) -> str: params = {"filePath": encoded_file_path} - with _build_session() as session: - response = session.get( - self.presign_endpoint, - headers=headers, - params=params, - timeout=_REQUEST_TIMEOUT, - ) + try: + with _build_session() as session: + response = session.get( + self.presign_endpoint, + headers=headers, + params=params, + timeout=_REQUEST_TIMEOUT, + ) + except requests.exceptions.RequestException as e: + raise RuntimeError( + f"Failed to get presigned URL: request failed: {e}" + ) from e if response.status_code != 200: raise RuntimeError( @@ -134,8 +139,13 @@ def read_file(self) -> io.BytesIO: :raises: RuntimeError if the retrieval fails. """ presigned_url = self.get_presigned_url() - with _build_session() as session: - response = session.get(presigned_url, timeout=_REQUEST_TIMEOUT) + try: + with _build_session() as session: + response = session.get(presigned_url, timeout=_REQUEST_TIMEOUT) + except requests.exceptions.RequestException as e: + raise RuntimeError( + f"Failed to retrieve file content: request failed: {e}" + ) from e if response.status_code != 200: raise RuntimeError( diff --git a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py index b5909c43acd..9ee8f9773fc 100644 --- a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py +++ b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py @@ -18,6 +18,7 @@ import io import pytest +import requests from unittest.mock import patch, MagicMock from pytexera.storage.dataset_file_document import ( @@ -28,7 +29,6 @@ _RETRY_STATUS_FORCELIST, ) - DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download" CUSTOM_ENDPOINT = "https://example.test/api/presign" @@ -290,3 +290,26 @@ def test_session_mounts_retry_adapter_for_http_and_https(self): assert retry.allowed_methods == frozenset({"GET"}) finally: session.close() + + def test_presigned_url_request_timeout_is_wrapped_in_runtime_error( + self, monkeypatch + ): + doc = self._make_doc(monkeypatch) + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: + mock_get.side_effect = requests.exceptions.ReadTimeout("timed out") + with pytest.raises(RuntimeError, match="request failed"): + doc.get_presigned_url() + + def test_download_request_timeout_is_wrapped_in_runtime_error(self, monkeypatch): + doc = self._make_doc(monkeypatch) + with patch( + "pytexera.storage.dataset_file_document.requests.Session.get" + ) as mock_get: + mock_get.side_effect = [ + make_response(200, body={"presignedUrl": "https://signed.test/x"}), + requests.exceptions.ConnectionError("connection reset"), + ] + with pytest.raises(RuntimeError, match="Failed to retrieve file content"): + doc.read_file() From 681b6868998b218cbfa081278431579996bbd335 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Mon, 15 Jun 2026 03:10:49 -0700 Subject: [PATCH 4/6] moved class from global to singleton --- .../pytexera/storage/dataset_file_document.py | 60 +++++++++---------- .../storage/test_dataset_file_document.py | 24 ++++---- 2 files changed, 40 insertions(+), 44 deletions(-) diff --git a/amber/src/main/python/pytexera/storage/dataset_file_document.py b/amber/src/main/python/pytexera/storage/dataset_file_document.py index 8834ffb5cd1..ee0347aa5bf 100644 --- a/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -22,34 +22,34 @@ from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry -# (connect, read) timeout and retry settings for the file-service GETs below. -_CONNECT_TIMEOUT_SECONDS = 10 -_READ_TIMEOUT_SECONDS = 60 -_REQUEST_TIMEOUT = (_CONNECT_TIMEOUT_SECONDS, _READ_TIMEOUT_SECONDS) -_MAX_RETRIES = 3 -_RETRY_BACKOFF_FACTOR = 0.5 -_RETRY_STATUS_FORCELIST = (500, 502, 503, 504) - - -def _build_session() -> requests.Session: - """Returns a Session that retries GETs on connection errors and 5xx.""" - retry = Retry( - total=_MAX_RETRIES, - connect=_MAX_RETRIES, - read=_MAX_RETRIES, - backoff_factor=_RETRY_BACKOFF_FACTOR, - status_forcelist=_RETRY_STATUS_FORCELIST, - allowed_methods=frozenset({"GET"}), - raise_on_status=False, - ) - adapter = HTTPAdapter(max_retries=retry) - session = requests.Session() - session.mount("http://", adapter) - session.mount("https://", adapter) - return session - class DatasetFileDocument: + # (connect, read) timeout and retry settings for the file-service GETs below. + _CONNECT_TIMEOUT_SECONDS = 10 + _READ_TIMEOUT_SECONDS = 60 + _REQUEST_TIMEOUT = (_CONNECT_TIMEOUT_SECONDS, _READ_TIMEOUT_SECONDS) + _MAX_RETRIES = 3 + _RETRY_BACKOFF_FACTOR = 0.5 + _RETRY_STATUS_FORCELIST = (500, 502, 503, 504) + + @classmethod + def _build_session(cls) -> requests.Session: + """Returns a Session that retries GETs on connection errors and 5xx.""" + retry = Retry( + total=cls._MAX_RETRIES, + connect=cls._MAX_RETRIES, + read=cls._MAX_RETRIES, + backoff_factor=cls._RETRY_BACKOFF_FACTOR, + status_forcelist=cls._RETRY_STATUS_FORCELIST, + allowed_methods=frozenset({"GET"}), + raise_on_status=False, + ) + adapter = HTTPAdapter(max_retries=retry) + session = requests.Session() + session.mount("http://", adapter) + session.mount("https://", adapter) + return session + def __init__(self, file_path: str): """ Parses the file path into dataset metadata. @@ -98,12 +98,12 @@ def get_presigned_url(self) -> str: params = {"filePath": encoded_file_path} try: - with _build_session() as session: + with self._build_session() as session: response = session.get( self.presign_endpoint, headers=headers, params=params, - timeout=_REQUEST_TIMEOUT, + timeout=self._REQUEST_TIMEOUT, ) except requests.exceptions.RequestException as e: raise RuntimeError( @@ -140,8 +140,8 @@ def read_file(self) -> io.BytesIO: """ presigned_url = self.get_presigned_url() try: - with _build_session() as session: - response = session.get(presigned_url, timeout=_REQUEST_TIMEOUT) + with self._build_session() as session: + response = session.get(presigned_url, timeout=self._REQUEST_TIMEOUT) except requests.exceptions.RequestException as e: raise RuntimeError( f"Failed to retrieve file content: request failed: {e}" diff --git a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py index 9ee8f9773fc..9a447177dd6 100644 --- a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py +++ b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py @@ -21,13 +21,7 @@ import requests from unittest.mock import patch, MagicMock -from pytexera.storage.dataset_file_document import ( - DatasetFileDocument, - _build_session, - _REQUEST_TIMEOUT, - _MAX_RETRIES, - _RETRY_STATUS_FORCELIST, -) +from pytexera.storage.dataset_file_document import DatasetFileDocument DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download" CUSTOM_ENDPOINT = "https://example.test/api/presign" @@ -262,7 +256,7 @@ def test_presigned_url_request_passes_request_timeout(self, monkeypatch): mock_get.return_value = make_response(200, body={"presignedUrl": "u"}) doc.get_presigned_url() _, kwargs = mock_get.call_args - assert kwargs["timeout"] == _REQUEST_TIMEOUT + assert kwargs["timeout"] == DatasetFileDocument._REQUEST_TIMEOUT def test_download_request_passes_request_timeout(self, monkeypatch): doc = self._make_doc(monkeypatch) @@ -275,17 +269,19 @@ def test_download_request_passes_request_timeout(self, monkeypatch): ] doc.read_file() _, download_kwargs = mock_get.call_args_list[1] - assert download_kwargs["timeout"] == _REQUEST_TIMEOUT + assert download_kwargs["timeout"] == DatasetFileDocument._REQUEST_TIMEOUT def test_session_mounts_retry_adapter_for_http_and_https(self): - session = _build_session() + session = DatasetFileDocument._build_session() try: for prefix in ("http://", "https://"): retry = session.get_adapter(prefix).max_retries - assert retry.total == _MAX_RETRIES - assert retry.connect == _MAX_RETRIES - assert retry.read == _MAX_RETRIES - assert set(retry.status_forcelist) == set(_RETRY_STATUS_FORCELIST) + assert retry.total == DatasetFileDocument._MAX_RETRIES + assert retry.connect == DatasetFileDocument._MAX_RETRIES + assert retry.read == DatasetFileDocument._MAX_RETRIES + assert set(retry.status_forcelist) == set( + DatasetFileDocument._RETRY_STATUS_FORCELIST + ) # Only idempotent GETs should be retried. assert retry.allowed_methods == frozenset({"GET"}) finally: From 896bbfa6c6daf264772438f2ded51cc93b457b61 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Tue, 16 Jun 2026 15:13:49 -0700 Subject: [PATCH 5/6] 1. lowered the timeout time, 2. renamed ...build... to ...retry...., 3. added the two libraries as installs rather than transient installs --- amber/requirements.txt | 2 ++ .../python/pytexera/storage/dataset_file_document.py | 11 ++++++----- .../pytexera/storage/test_dataset_file_document.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/amber/requirements.txt b/amber/requirements.txt index cba984f7a04..e64039c8fef 100644 --- a/amber/requirements.txt +++ b/amber/requirements.txt @@ -43,3 +43,5 @@ SQLAlchemy==2.0.37 pg8000==1.31.5 pympler==1.1 boto3==1.40.53 +requests==2.34.0 +urllib3==2.7.0 diff --git a/amber/src/main/python/pytexera/storage/dataset_file_document.py b/amber/src/main/python/pytexera/storage/dataset_file_document.py index ee0347aa5bf..5a063f60470 100644 --- a/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -25,15 +25,16 @@ class DatasetFileDocument: # (connect, read) timeout and retry settings for the file-service GETs below. - _CONNECT_TIMEOUT_SECONDS = 10 - _READ_TIMEOUT_SECONDS = 60 + # Read timeout bounds inactivity between bytes, not total download time. + _CONNECT_TIMEOUT_SECONDS = 5 + _READ_TIMEOUT_SECONDS = 10 _REQUEST_TIMEOUT = (_CONNECT_TIMEOUT_SECONDS, _READ_TIMEOUT_SECONDS) _MAX_RETRIES = 3 _RETRY_BACKOFF_FACTOR = 0.5 _RETRY_STATUS_FORCELIST = (500, 502, 503, 504) @classmethod - def _build_session(cls) -> requests.Session: + def _retry_session(cls) -> requests.Session: """Returns a Session that retries GETs on connection errors and 5xx.""" retry = Retry( total=cls._MAX_RETRIES, @@ -98,7 +99,7 @@ def get_presigned_url(self) -> str: params = {"filePath": encoded_file_path} try: - with self._build_session() as session: + with self._retry_session() as session: response = session.get( self.presign_endpoint, headers=headers, @@ -140,7 +141,7 @@ def read_file(self) -> io.BytesIO: """ presigned_url = self.get_presigned_url() try: - with self._build_session() as session: + with self._retry_session() as session: response = session.get(presigned_url, timeout=self._REQUEST_TIMEOUT) except requests.exceptions.RequestException as e: raise RuntimeError( diff --git a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py index 9a447177dd6..36882fe27ee 100644 --- a/amber/src/test/python/pytexera/storage/test_dataset_file_document.py +++ b/amber/src/test/python/pytexera/storage/test_dataset_file_document.py @@ -272,7 +272,7 @@ def test_download_request_passes_request_timeout(self, monkeypatch): assert download_kwargs["timeout"] == DatasetFileDocument._REQUEST_TIMEOUT def test_session_mounts_retry_adapter_for_http_and_https(self): - session = DatasetFileDocument._build_session() + session = DatasetFileDocument._retry_session() try: for prefix in ("http://", "https://"): retry = session.get_adapter(prefix).max_retries From ac5992c39662bb949978e11507cda1f0ff96693a Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Tue, 16 Jun 2026 15:36:43 -0700 Subject: [PATCH 6/6] changed transient library verion to avoid needing change in licnense file --- amber/LICENSE-binary-python | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amber/LICENSE-binary-python b/amber/LICENSE-binary-python index 1a3202bf113..bea70481933 100644 --- a/amber/LICENSE-binary-python +++ b/amber/LICENSE-binary-python @@ -228,7 +228,7 @@ Python packages: - pympler==1.1 - python-dateutil==2.8.2 - regex==2026.5.9 - - requests==2.34.2 + - requests==2.34.0 - s3transfer==0.14.0 - safetensors==0.8.0 - tenacity==8.5.0