Skip to content

Commit f08cdeb

Browse files
Copilotmykaul
authored andcommitted
Fix infinite retry when single host fails with server error
Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
1 parent d37e3e5 commit f08cdeb

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

cassandra/cluster.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4543,7 +4543,7 @@ def _make_query_plan(self):
45434543
# or to the explicit host target if set
45444544
if self._host:
45454545
# returning a single value effectively disables retries
4546-
self.query_plan = [self._host]
4546+
self.query_plan = iter([self._host])
45474547
else:
45484548
# convert the list/generator/etc to an iterator so that subsequent
45494549
# calls to send_request (which retries may do) will resume where

tests/unit/test_response_future.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from cassandra.protocol import (ReadTimeoutErrorMessage, WriteTimeoutErrorMessage,
2525
UnavailableErrorMessage, ResultMessage, QueryMessage,
2626
OverloadedErrorMessage, IsBootstrappingErrorMessage,
27-
PreparedQueryNotFound, PrepareMessage,
27+
PreparedQueryNotFound, PrepareMessage, ServerError,
2828
RESULT_KIND_ROWS, RESULT_KIND_SET_KEYSPACE,
2929
RESULT_KIND_SCHEMA_CHANGE, RESULT_KIND_PREPARED,
3030
ProtocolHandler)
@@ -668,3 +668,58 @@ def test_timeout_does_not_release_stream_id(self):
668668

669669
assert len(connection.request_ids) == 0, \
670670
"Request IDs should be empty but it's not: {}".format(connection.request_ids)
671+
672+
def test_single_host_query_plan_exhausted_after_one_retry(self):
673+
"""
674+
Test that when a specific host is provided, the query plan is properly
675+
exhausted after one attempt and doesn't cause infinite retries.
676+
677+
This test reproduces the issue where providing a single host in the query plan
678+
(via the host parameter) would cause infinite retries on server errors because
679+
the query_plan was a list instead of an iterator.
680+
"""
681+
session = self.make_basic_session()
682+
pool = self.make_pool()
683+
session._pools.get.return_value = pool
684+
685+
# Create a specific host
686+
specific_host = Mock()
687+
688+
connection = Mock(spec=Connection)
689+
pool.borrow_connection.return_value = (connection, 1)
690+
691+
query = SimpleStatement("INSERT INTO foo (a, b) VALUES (1, 2)")
692+
message = QueryMessage(query=query, consistency_level=ConsistencyLevel.ONE)
693+
694+
# Create ResponseFuture with a specific host (this is the key to reproducing the bug)
695+
rf = ResponseFuture(session, message, query, 1, host=specific_host)
696+
rf.send_request()
697+
698+
# Verify initial request was sent
699+
rf.session._pools.get.assert_called_once_with(specific_host)
700+
pool.borrow_connection.assert_called_once_with(timeout=ANY, routing_key=ANY, keyspace=ANY, table=ANY)
701+
connection.send_msg.assert_called_once_with(rf.message, 1, cb=ANY, encoder=ProtocolHandler.encode_message, decoder=ProtocolHandler.decode_message, result_metadata=[])
702+
703+
# Simulate a ServerError response (which triggers RETRY_NEXT_HOST by default)
704+
result = Mock(spec=ServerError, info={})
705+
result.to_exception.return_value = result
706+
rf._set_result(specific_host, None, None, result)
707+
708+
# The retry should be scheduled
709+
rf.session.cluster.scheduler.schedule.assert_called_once_with(ANY, rf._retry_task, False, specific_host)
710+
assert 1 == rf._query_retries
711+
712+
# Reset mocks to track next calls
713+
pool.borrow_connection.reset_mock()
714+
connection.send_msg.reset_mock()
715+
716+
# Now simulate the retry task executing
717+
# The bug would cause this to succeed and retry again infinitely
718+
# The fix ensures the iterator is exhausted after the first try
719+
rf._retry_task(False, specific_host)
720+
721+
# After the retry, send_request should be called but the query_plan iterator
722+
# should be exhausted, so no new request should be sent
723+
# Instead, it should set a NoHostAvailable exception
724+
assert rf._final_exception is not None
725+
assert isinstance(rf._final_exception, NoHostAvailable)

0 commit comments

Comments
 (0)