Skip to content

Commit 865631c

Browse files
committed
Honor Cache-control max-age / Expires HTTP headers during feed updates. #376
1 parent cd06043 commit 865631c

File tree

4 files changed

+68
-2
lines changed

4 files changed

+68
-2
lines changed

src/reader/_parser/__init__.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535

3636

3737
if TYPE_CHECKING: # pragma: no cover
38+
from werkzeug.datastructures import RequestCacheControl
39+
3840
from ._lazy import Parser as Parser
3941

4042

@@ -213,6 +215,32 @@ def retry_after(self) -> datetime | timedelta | None:
213215

214216
return timedelta(seconds=seconds)
215217

218+
@property
219+
def cache_control(self) -> RequestCacheControl | None:
220+
"""Parsed Cache-Control header, or None if missing."""
221+
222+
# lazy import
223+
from ._http_utils import parse_cache_control_header
224+
225+
value = self.headers.get('cache-control')
226+
if not value:
227+
return None
228+
229+
return parse_cache_control_header(value)
230+
231+
@property
232+
def expires(self) -> datetime | None:
233+
"""Parsed Expires header, or None if missing."""
234+
235+
# lazy import
236+
from ._http_utils import parse_date
237+
238+
value = self.headers.get('expires')
239+
if not value:
240+
return None
241+
242+
return parse_date(value)
243+
216244
def get_update_after(self, now: datetime) -> datetime | None:
217245
"""Select the best "update after" date from available headers."""
218246
rv = []
@@ -225,6 +253,15 @@ def get_update_after(self, now: datetime) -> datetime | None:
225253
retry_after = now + retry_after
226254
rv.append(retry_after)
227255

256+
# https://httpwg.org/specs/rfc9111.html#calculating.freshness.lifetime
257+
if cache_control := self.cache_control:
258+
if max_age := cache_control.max_age:
259+
rv.append(now + timedelta(seconds=max_age))
260+
elif expires := self.expires:
261+
# TODO (#376): technically this is supposed to be against Date
262+
rv.append(expires.astimezone(timezone.utc))
263+
# TODO (#376): what about heuristics?
264+
228265
return max(rv, default=None)
229266

230267

src/reader/_parser/_http_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44
"""
55

66
from collections.abc import Iterable
7+
from functools import partial
78

89
import werkzeug.http
10+
from werkzeug.datastructures import MIMEAccept
11+
from werkzeug.datastructures import ResponseCacheControl
912

1013

1114
parse_options_header = werkzeug.http.parse_options_header
@@ -14,4 +17,9 @@
1417

1518

1619
def unparse_accept_header(values: Iterable[tuple[str, float]]) -> str:
17-
return werkzeug.datastructures.MIMEAccept(values).to_header()
20+
return MIMEAccept(values).to_header()
21+
22+
23+
parse_cache_control_header = partial(
24+
werkzeug.http.parse_cache_control_header, cls=ResponseCacheControl
25+
)

src/reader/_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ def update(
253253

254254
update_after = next_update_after(self.global_now, **self.config)
255255
if result.http_info:
256+
# TODO (#376): technically this is supposed to be against request end
256257
http_update_after = result.http_info.get_update_after(self.global_now)
257258
# also accounts for it being in the past / negative
258259
if http_update_after and http_update_after > update_after:

tests/test_reader.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -930,10 +930,14 @@ def test_update_after_invalid_config(reader, parser, config, config_is_global):
930930
assert feed.update_after == datetime(2010, 1, 1, 1)
931931

932932

933-
def headers(*, ra=None):
933+
def headers(*, ra=None, ma=None, exp=None):
934934
rv = {}
935935
if ra is not None:
936936
rv['retry-after'] = ra
937+
if ma is not None:
938+
rv['cache-control'] = f"max-age={ma}"
939+
if exp is not None:
940+
rv['expires'] = exp
937941
return rv
938942

939943

@@ -979,6 +983,22 @@ def ids(value):
979983
(60, 429, headers(ra='Thu, 31 Dec 2009 23:40:00 GMT'), datetime(2010, 1, 1, 1)),
980984
(60, 429, headers(ra="not a date, not an int"), datetime(2010, 1, 1, 1)),
981985
(60, 200, headers(ra=6000), datetime(2010, 1, 1, 1)),
986+
(60, 200, headers(ma=6000), datetime(2010, 1, 1, 2)),
987+
(60, 200, headers(ma="not an int"), datetime(2010, 1, 1, 1)),
988+
(
989+
60,
990+
200,
991+
headers(exp='Fri, 01 Jan 2010 01:40:00 GMT'),
992+
datetime(2010, 1, 1, 2),
993+
),
994+
(
995+
60,
996+
200,
997+
headers(ma=3000, exp='Fri, 01 Jan 2010 01:40:00 GMT'),
998+
datetime(2010, 1, 1, 1),
999+
),
1000+
(60, 429, headers(ra=3000, ma=6000), datetime(2010, 1, 1, 2)),
1001+
(60, 429, headers(ra=6000, ma=3000), datetime(2010, 1, 1, 2)),
9821002
],
9831003
ids=ids,
9841004
)

0 commit comments

Comments
 (0)