Skip to content

Commit ee031ae

Browse files
committed
Fix silent hang in flush when nodeShutdown returns non-recoverable error
During physical restore, flush() calls nodeShutdown() to shut down mongod. Previously, if nodeShutdown() returned an error other than ConflictingOperationInProgress (e.g., Unauthorized when not connecting from localhost), the error was silently ignored and the code proceeded to waitMgoShutdown(), which would hang indefinitely. Extract error classification into isNonRecoverableShutdownErr() helper to make it easy to add new non-recoverable error patterns in the future. Signed-off-by: alexgim961101 <alexgim961101@gmail.com>
1 parent 01eb998 commit ee031ae

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

pbm/restore/physical.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,7 @@ func (r *PhysRestore) flush(ctx context.Context) error {
439439
// so we have to shut it down despite of role
440440
if !inf.IsPrimary || len(rsStat.Members) == 1 {
441441
err = nodeShutdown(ctx, r.node)
442-
if err != nil &&
443-
strings.Contains(err.Error(), // wait a bit and let the node to stepdown
444-
"(ConflictingOperationInProgress) This node is already in the process of stepping down") {
442+
if err != nil && isNonRecoverableShutdownErr(err) {
445443
return errors.Wrap(err, "shutdown server")
446444
}
447445
break
@@ -571,6 +569,26 @@ func nodeShutdown(ctx context.Context, m *mongo.Client) error {
571569
return err
572570
}
573571

572+
// isNonRecoverableShutdownErr returns true if the error from nodeShutdown
573+
// indicates that mongod will not shut down and waiting would be futile.
574+
func isNonRecoverableShutdownErr(err error) bool {
575+
if err == nil {
576+
return false
577+
}
578+
s := err.Error()
579+
// Add new non-recoverable error patterns here as needed.
580+
nonRecoverablePatterns := []string{
581+
"(Unauthorized)",
582+
"(ConflictingOperationInProgress)",
583+
}
584+
for _, p := range nonRecoverablePatterns {
585+
if strings.Contains(s, p) {
586+
return true
587+
}
588+
}
589+
return false
590+
}
591+
574592
// waitMgoShutdown waits until mongod releases mongod.lock file within dbpath dir.
575593
// In case of timeout or unexpected error it'll return error.
576594
func waitMgoShutdown(dbpath string) error {

0 commit comments

Comments
 (0)