Skip to content

Commit 0e6a884

Browse files
authored
fix(sentinel): preserve root seeds for outage recovery (#3188)
fixes #3127
1 parent 1d8b12a commit 0e6a884

2 files changed

Lines changed: 99 additions & 4 deletions

File tree

packages/client/lib/sentinel/index.spec.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,84 @@ describe('legacy tests', () => {
979979
assert.notEqual(sentinelNode!.port, newSentinel.port);
980980
});
981981

982+
it('Should recover after full outage', async function () {
983+
this.timeout(120000);
984+
985+
const allSentinelPorts = frame.getAllSentinelsPort();
986+
const primarySentinelPort = allSentinelPorts[0];
987+
const extraSentinelPorts = allSentinelPorts.slice(1);
988+
989+
// Keep only one sentinel reachable for the test.
990+
await Promise.all(extraSentinelPorts.map(port => frame.stopSentinel(port.toString())));
991+
await setTimeout(1500);
992+
993+
sentinel = RedisSentinel.create({
994+
name: config.sentinelName,
995+
sentinelRootNodes: [{ host: '127.0.0.1', port: primarySentinelPort }],
996+
RESP: 3,
997+
scanInterval: 250
998+
});
999+
sentinel.setTracer(tracer);
1000+
sentinel.on("error", () => { });
1001+
await sentinel.connect();
1002+
1003+
await sentinel.set('some-key', 'value');
1004+
assert.equal(await sentinel.get('some-key'), 'value');
1005+
1006+
const allNodePorts = frame.getAllNodesPort();
1007+
// Simulate full outage (all Redis nodes + the single configured sentinel).
1008+
await Promise.all(allNodePorts.map(port => frame.stopNode(port.toString())));
1009+
await frame.stopSentinel(primarySentinelPort.toString());
1010+
1011+
const timedGet = async () => {
1012+
const getPromise = sentinel!.get('some-key');
1013+
void getPromise.catch(() => undefined); // Promise.race may timeout first.
1014+
1015+
return Promise.race([
1016+
getPromise,
1017+
setTimeout(1000).then(() => {
1018+
throw new Error('1s Timeout');
1019+
})
1020+
]);
1021+
};
1022+
1023+
const pollResults: Array<{ phase: 'outage' | 'recovery'; status: 'success' | 'timeout' | 'error' }> = [];
1024+
const pollLoop = async (phase: 'outage' | 'recovery', rounds: number) => {
1025+
for (let i = 0; i < rounds; i++) {
1026+
try {
1027+
await timedGet();
1028+
pollResults.push({ phase, status: 'success' });
1029+
} catch (err: any) {
1030+
pollResults.push({
1031+
phase,
1032+
status: err?.message === '1s Timeout' ? 'timeout' : 'error'
1033+
});
1034+
}
1035+
await setTimeout(3000);
1036+
}
1037+
};
1038+
1039+
// Match the issue's periodic GET calls while outage is active.
1040+
await pollLoop('outage', 3);
1041+
1042+
// Bring only the single configured sentinel back; keep extra sentinels down.
1043+
await Promise.all(allNodePorts.map(port => frame.restartNode(port.toString())));
1044+
await frame.restartSentinel(primarySentinelPort.toString());
1045+
1046+
// Continue periodic GET loop and assert recovery.
1047+
await pollLoop('recovery', 5);
1048+
1049+
const sawOutageFailure = pollResults.some(result =>
1050+
result.phase === 'outage' && result.status !== 'success'
1051+
);
1052+
assert.equal(sawOutageFailure, true, 'expected GET failures during outage');
1053+
1054+
const sawRecoverySuccess = pollResults.some(result =>
1055+
result.phase === 'recovery' && result.status === 'success'
1056+
);
1057+
assert.equal(sawRecoverySuccess, true, 'expected periodic GET to recover after restart');
1058+
});
1059+
9821060
it('timer works, and updates sentinel list', async function () {
9831061
this.timeout(60000);
9841062

packages/client/lib/sentinel/index.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,7 @@ class RedisSentinelInternal<
652652

653653
#configEpoch: number = 0;
654654

655+
readonly #sentinelSeedNodes: Array<RedisNode>;
655656
#sentinelRootNodes: Array<RedisNode>;
656657
#sentinelClient?: RedisClientType<RedisModules, RedisFunctions, RedisScripts, RespVersions, TypeMapping>;
657658

@@ -696,7 +697,8 @@ class RedisSentinelInternal<
696697
this.#name = options.name;
697698

698699
this.#RESP = options.RESP;
699-
this.#sentinelRootNodes = Array.from(options.sentinelRootNodes);
700+
this.#sentinelSeedNodes = Array.from(options.sentinelRootNodes);
701+
this.#sentinelRootNodes = Array.from(this.#sentinelSeedNodes);
700702
this.#maxCommandRediscovers = options.maxCommandRediscovers ?? 16;
701703
this.#masterPoolSize = options.masterPoolSize ?? 1;
702704
this.#replicaPoolSize = options.replicaPoolSize ?? 0;
@@ -951,13 +953,27 @@ class RedisSentinelInternal<
951953
}
952954
}
953955

956+
#sentinelNodeListKey(nodes: Array<RedisNode>) {
957+
return nodes.map(node => `${node.host}:${node.port}`).sort().join('|');
958+
}
959+
960+
#restoreSentinelRootNodesIfEmpty() {
961+
if (this.#sentinelRootNodes.length !== 0) {
962+
return;
963+
}
964+
965+
this.#trace("restoring sentinel roots from seed nodes");
966+
this.#sentinelRootNodes = Array.from(this.#sentinelSeedNodes);
967+
}
968+
954969
#handleSentinelFailure(node: RedisNode) {
955970
const found = this.#sentinelRootNodes.findIndex(
956971
(rootNode) => rootNode.host === node.host && rootNode.port === node.port
957972
);
958973
if (found !== -1) {
959974
this.#sentinelRootNodes.splice(found, 1);
960975
}
976+
this.#restoreSentinelRootNodesIfEmpty();
961977
this.#reset();
962978
}
963979

@@ -1104,6 +1120,8 @@ class RedisSentinelInternal<
11041120

11051121
// observe/analyze/transform remediation functions
11061122
async observe() {
1123+
this.#restoreSentinelRootNodesIfEmpty();
1124+
11071125
for (const node of this.#sentinelRootNodes) {
11081126
let client: RedisClientType<typeof RedisSentinelModule, {}, {}, RespVersions, {}> | undefined;
11091127
try {
@@ -1247,8 +1265,7 @@ class RedisSentinelInternal<
12471265
};
12481266
this.emit('client-error', event);
12491267
this.#handleSentinelFailure(node);
1250-
})
1251-
.on('end', () => this.#handleSentinelFailure(node));
1268+
});
12521269
this.#sentinelClient = client;
12531270

12541271
this.#trace(`transform: adding sentinel client connect() to promise list`);
@@ -1383,7 +1400,7 @@ class RedisSentinelInternal<
13831400
}
13841401
}
13851402

1386-
if (analyzed.sentinelList.length != this.#sentinelRootNodes.length) {
1403+
if (this.#sentinelNodeListKey(analyzed.sentinelList) !== this.#sentinelNodeListKey(this.#sentinelRootNodes)) {
13871404
this.#sentinelRootNodes = analyzed.sentinelList;
13881405
const event: RedisSentinelEvent = {
13891406
type: "SENTINE_LIST_CHANGE",

0 commit comments

Comments
 (0)