Skip to content

Commit 5cd6c19

Browse files
authored
Improving caching of S3 endpoints by omitting unused parameters (#10165)
1 parent ededd6f commit 5cd6c19

6 files changed

Lines changed: 90 additions & 36 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "enhancement",
3+
"category": "``s3``",
4+
"description": "Improve caching of S3 endpoints, which should improve performance of sync and recursive copy commands that interact with multiple keys in the same bucket"
5+
}

awscli/botocore/endpoint_provider.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@
4747
r"^(?!-)[a-zA-Z\d-]{1,63}(?<!-)$",
4848
)
4949
CACHE_SIZE = 100
50+
# S3 endpoint ruleset parameters that are defined but not currently referenced.
51+
# They are excluded from the cache key to avoid cache thrashing when
52+
# accessing multiple objects in the same bucket.
53+
S3_UNREFERENCED_PARAMS = {'Key', 'Prefix', 'CopySource'}
5054
ARN_PARSER = ArnParser()
5155
STRING_FORMATTER = Formatter()
5256

@@ -700,16 +704,22 @@ def evaluate(self, input_parameters):
700704
class EndpointProvider:
701705
"""Derives endpoints from a RuleSet for given input parameters."""
702706

703-
def __init__(self, ruleset_data, partition_data):
707+
def __init__(self, ruleset_data, partition_data, excluded_params=None):
704708
self.ruleset = RuleSet(**ruleset_data, partitions=partition_data)
709+
self._excluded_params = excluded_params or frozenset()
705710

706-
@lru_cache_weakref(maxsize=CACHE_SIZE)
707711
def resolve_endpoint(self, **input_parameters):
708712
"""Match input parameters to a rule.
709713
710714
:type input_parameters: dict
711715
:rtype: RuleSetEndpoint
712716
"""
717+
for param in self._excluded_params:
718+
input_parameters.pop(param, None)
719+
return self._resolve_endpoint(**input_parameters)
720+
721+
@lru_cache_weakref(maxsize=CACHE_SIZE)
722+
def _resolve_endpoint(self, **input_parameters):
713723
params_for_error = input_parameters.copy()
714724
endpoint = self.ruleset.evaluate(input_parameters)
715725
if endpoint is None:

awscli/botocore/regions.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import jmespath
2626
from botocore import UNSIGNED, xform_name
2727
from botocore.auth import AUTH_TYPE_MAPS, resolve_auth_scheme_preference
28-
from botocore.endpoint_provider import EndpointProvider
28+
from botocore.endpoint_provider import S3_UNREFERENCED_PARAMS, EndpointProvider
2929
from botocore.exceptions import (
3030
EndpointProviderError,
3131
EndpointVariantError,
@@ -476,6 +476,11 @@ def __init__(
476476
self._provider = EndpointProvider(
477477
ruleset_data=endpoint_ruleset_data,
478478
partition_data=partition_data,
479+
excluded_params=(
480+
S3_UNREFERENCED_PARAMS
481+
if service_model.service_name == 's3'
482+
else None
483+
),
479484
)
480485
self._param_definitions = self._provider.ruleset.parameters
481486
self._service_model = service_model
@@ -522,9 +527,9 @@ def construct_endpoint(
522527

523528
# The endpoint provider does not support non-secure transport.
524529
if (
525-
not self._use_ssl
526-
and provider_result.url.startswith('https://')
527-
and 'Endpoint' not in provider_params
530+
not self._use_ssl
531+
and provider_result.url.startswith('https://')
532+
and 'Endpoint' not in provider_params
528533
):
529534
provider_result = provider_result._replace(
530535
url=f'http://{provider_result.url[8:]}'

tests/functional/botocore/test_endpoint_rulesets.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
import pytest
1919
from botocore import xform_name
2020
from botocore.config import Config
21-
from botocore.endpoint_provider import EndpointProvider
21+
from botocore.endpoint_provider import (
22+
S3_UNREFERENCED_PARAMS,
23+
EndpointProvider,
24+
)
2225
from botocore.exceptions import (
2326
BotoCoreError,
2427
ClientError,
@@ -295,3 +298,34 @@ def builtin_overwriter_handler(builtins, **kwargs):
295298
except (ClientError, ResponseParserError):
296299
pass
297300
assert len(http_stubber.requests) == 0
301+
302+
303+
def _collect_refs_from_rules(rules):
304+
"""Recursively collect all {\"ref\": \"...\"} values from rules."""
305+
refs = set()
306+
if isinstance(rules, dict):
307+
if 'ref' in rules:
308+
refs.add(rules['ref'])
309+
for value in rules.values():
310+
refs.update(_collect_refs_from_rules(value))
311+
elif isinstance(rules, list):
312+
for item in rules:
313+
refs.update(_collect_refs_from_rules(item))
314+
return refs
315+
316+
317+
@pytest.mark.validates_model
318+
def test_s3_unreferenced_endpoint_ruleset_params():
319+
"""Assert that the set of known S3 endpoint ruleset parameters that are
320+
not used in the rules hasn't changed. These are set in the
321+
S3_UNREFERENCED_PARAMS constant to prevent them from influencing the cache.
322+
"""
323+
ruleset = LOADER.load_service_model('s3', 'endpoint-rule-set-1')
324+
all_params = set(ruleset['parameters'].keys())
325+
referenced = _collect_refs_from_rules(ruleset['rules'])
326+
unreferenced = all_params - referenced
327+
assert unreferenced == S3_UNREFERENCED_PARAMS, (
328+
"The set of unused S3 endpoints parameters have changed, examine the "
329+
"implication of this on endpoint caching behavior before updating"
330+
"S3_UNREFERENCED_PARAMS."
331+
)

tests/unit/botocore/test_endpoint_provider.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,18 @@
1515
import logging
1616
import os
1717
from unittest import mock
18+
from unittest.mock import Mock, patch
1819

1920
import pytest
20-
2121
from botocore.endpoint_provider import (
2222
EndpointProvider,
2323
EndpointRule,
2424
ErrorRule,
2525
RuleCreator,
2626
RuleSet,
27+
RuleSetEndpoint,
2728
RuleSetStandardLibary,
28-
TreeRule, RuleSetEndpoint,
29+
TreeRule,
2930
)
3031
from botocore.exceptions import (
3132
EndpointResolutionError,
@@ -34,8 +35,6 @@
3435
from botocore.loaders import Loader
3536
from botocore.regions import EndpointRulesetResolver
3637

37-
from unittest.mock import Mock, patch
38-
3938
REGION_TEMPLATE = "{Region}"
4039
REGION_REF = {"ref": "Region"}
4140
BUCKET_ARN_REF = {"ref": "bucketArn"}
@@ -89,9 +88,7 @@
8988
"disableDoubleEncoding": True,
9089
"name": "foo",
9190
"signingName": "s3-outposts",
92-
"signingRegionSet": [
93-
"*"
94-
]
91+
"signingRegionSet": ["*"],
9592
},
9693
{
9794
"disableDoubleEncoding": True,
@@ -425,7 +422,7 @@ def empty_resolver():
425422
'rules': [],
426423
},
427424
partition_data={},
428-
service_model=None,
425+
service_model=Mock(service_name='test'),
429426
builtins={},
430427
client_context=None,
431428
event_emitter=None,
@@ -512,6 +509,8 @@ def test_auth_schemes_conversion_first_authtype_unknown(
512509
def test_get_attr(rule_lib, value, path, expected_value):
513510
result = rule_lib.get_attr(value, path)
514511
assert result == expected_value
512+
513+
515514
@pytest.mark.parametrize(
516515
"use_ssl, endpoint_url, provider_params, expected_url",
517516
[
@@ -569,7 +568,7 @@ def test_construct_endpoint_parametrized(
569568
'rules': [],
570569
},
571570
partition_data={},
572-
service_model=None,
571+
service_model=Mock(service_name='test'),
573572
builtins={},
574573
client_context=None,
575574
event_emitter=None,
@@ -606,22 +605,22 @@ def test_construct_endpoint_parametrized(
606605
],
607606
)
608607
def test_auth_scheme_preference(
609-
auth_scheme_preference,
610-
expected_auth_scheme_name,
611-
monkeypatch
608+
auth_scheme_preference, expected_auth_scheme_name, monkeypatch
612609
):
613-
conditions = [
614-
PARSE_ARN_FUNC,
615-
{
616-
"fn": "not",
617-
"argv": [STRING_EQUALS_FUNC],
618-
},
619-
{
620-
"fn": "aws.partition",
621-
"argv": [REGION_REF],
622-
"assign": "PartitionResults",
623-
},
624-
],
610+
conditions = (
611+
[
612+
PARSE_ARN_FUNC,
613+
{
614+
"fn": "not",
615+
"argv": [STRING_EQUALS_FUNC],
616+
},
617+
{
618+
"fn": "aws.partition",
619+
"argv": [REGION_REF],
620+
"assign": "PartitionResults",
621+
},
622+
],
623+
)
625624
resolver = EndpointRulesetResolver(
626625
endpoint_ruleset_data={
627626
'version': '1.0',
@@ -635,7 +634,7 @@ def test_auth_scheme_preference(
635634
],
636635
},
637636
partition_data={},
638-
service_model=None,
637+
service_model=Mock(service_name='test'),
639638
builtins={},
640639
client_context=None,
641640
event_emitter=None,
@@ -651,13 +650,13 @@ def test_auth_scheme_preference(
651650
mock.patch.dict(
652651
'botocore.auth.AUTH_TYPE_MAPS',
653652
{'bar': None, 'foo': None},
654-
clear=True
653+
clear=True,
655654
),
656655
mock.patch.dict(
657656
'botocore.auth.AUTH_PREF_TO_SIGNATURE_VERSION',
658657
{'bar': 'bar', 'foo': 'foo'},
659-
clear=True
660-
)
658+
clear=True,
659+
),
661660
):
662661
name, scheme = resolver.auth_schemes_to_signing_ctx(auth_schemes)
663662
assert name == expected_auth_scheme_name

tests/unit/botocore/test_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import operator
1919
from contextlib import contextmanager
2020
from sys import getrefcount
21+
from unittest.mock import Mock
2122

2223
import botocore
2324
import pytest
@@ -1625,7 +1626,7 @@ def setUp(self):
16251626
'rules': [],
16261627
},
16271628
partition_data={},
1628-
service_model=None,
1629+
service_model=Mock(service_name='test'),
16291630
builtins={},
16301631
client_context=None,
16311632
event_emitter=None,

0 commit comments

Comments
 (0)