Skip to content

[FEATURE]Request: Add lsof Support to Identify Open-but-Deleted NFS Files (ESTALE / “Stale file handle”) #343

Description

@x-lugoo

When an NFS client and server share a file, and the file is deleted on the NFS server while it is still open (referenced) by a process on the NFS client, there is currently no reliable way to use lsof on the client to filter and list those file descriptors that correspond to deleted / “stale file handle” (ESTALE) files.

This makes troubleshooting difficult: for example, du -sh on the server and df -h on the client (or across hosts) can diverge more and more over time, and it is hard to identify the root cause because the open-but-deleted NFS files cannot be easily discovered with lsof.

I wrote a patch as an experiment, but I am not an lsof expert and I do not think the implementation quality is good enough for submission. I’m sharing this problem description to ask whether the lsof community can provide a proper solution for detecting and filtering these NFS “deleted but still open” / ESTALE file descriptors on clients.

nfs client side:

root@virtme-ng:/home/jeffxie/vng-share/nfs# exec 3<./test.txt 

nfs server side:

# rm ./test.txt

root@virtme-ng:/home/jeffxie/vng-share# lsof +L1
NOTHING OUTPUT

root@virtme-ng:/home/jeffxie/vng-share# stat -L /proc/399/fd/3 
stat: cannot statx '/proc/399/fd/3': Stale file handle
root@virtme-ng:/home/jeffxie/vng-share# echo $?
1

patch:

diff --git a/Lsof.8 b/Lsof.8
index b254dd4..1da4e00 100644
--- a/Lsof.8
+++ b/Lsof.8
@@ -1302,6 +1302,10 @@ It is also useful when host name lookup is not working properly.
 .B \-N
 selects the listing of NFS files.
 .TP \w'names'u+4
+.B \-y
+selects the listing of files whose stat(2)/lstat(2) fails with
+ESTALE ("Stale file handle"), e.g., stale NFS file handles.
+.TP \w'names'u+4
 .BI \-o
 directs
 .I lsof
diff --git a/lib/common.h b/lib/common.h
index 1e6c67e..88cd4fa 100644
--- a/lib/common.h
+++ b/lib/common.h
@@ -569,14 +569,15 @@ extern int ZoneColW;
 #    define SELEVTFDINFO                                                       \
         0x200000 /* selected for evetnfd info;                                 \
                   * cleared in link_lfile() */
+#    define SELSTALE 0x400000 /* select files with ESTALE (stale file handle) */
 
 #    define SELALL                                                             \
         (SELCMD | SELCNTX | SELFD | SELNA | SELNET | SELNM | SELNFS | SELPID | \
-         SELUID | SELUNX | SELZONE | SELTASK)
+         SELUID | SELUNX | SELZONE | SELTASK | SELSTALE)
 #    define SELPROC                                                            \
         (SELCMD | SELCNTX | SELPGID | SELPID | SELUID | SELZONE | SELTASK)
 /* process selecters */
-#    define SELFILE (SELFD | SELNFS | SELNLINK | SELNM) /* file selecters */
+#    define SELFILE (SELFD | SELNFS | SELNLINK | SELNM | SELSTALE) /* file selecters */
 #    define SELNW (SELNA | SELNET | SELUNX)             /* network selecters */
 
 /*
diff --git a/lib/dialects/linux/dproc.c b/lib/dialects/linux/dproc.c
index 8dc7c27..38dc7f8 100644
--- a/lib/dialects/linux/dproc.c
+++ b/lib/dialects/linux/dproc.c
@@ -917,6 +917,8 @@ static int process_id(struct lsof_context *ctx, /* context */
     static int pathil = 0;
     char *rest;
     int txts = 0;
+    int enss_fd = 0;
+    int enls_fd = 0;
 
 #if defined(HASSELINUX)
     cntxlist_t *cntxp;
@@ -1206,6 +1208,7 @@ static int process_id(struct lsof_context *ctx, /* context */
             } else {
                 if (HasNFS) {
                     if (lstatsafely(ctx, path, &lsb)) {
+                        enls_fd = errno;
                         (void)statEx(ctx, pbuf, &lsb, &ls);
                         enls = errno;
                     } else {
@@ -1213,6 +1216,7 @@ static int process_id(struct lsof_context *ctx, /* context */
                         ls = SB_ALL;
                     }
                     if (statsafely(ctx, path, &sb)) {
+                        enss_fd = errno;
                         (void)statEx(ctx, pbuf, &sb, &ss);
                         enss = errno;
                     } else {
@@ -1352,7 +1356,11 @@ static int process_id(struct lsof_context *ctx, /* context */
                                 "[pidfd:%d]", fi.pid);
                     enter_nm(ctx, rest);
                 }
-
+                if ((Selflags & SELSTALE) &&
+                    (enss_fd == ESTALE || enls_fd == ESTALE)) {
+                    Lf->sf |= SELSTALE;
+                    (void)add_nma(ctx, " (STALE)", 8);
+                }
                 if (Lf->sf)
                     link_lfile(ctx);
             }
diff --git a/src/main.c b/src/main.c
index 5d91a6e..ec79d8a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -151,7 +151,7 @@ int main(int argc, char *argv[]) {
      * Create option mask.
      */
     (void)snpf(options, sizeof(options),
-               "?a%sbc:%sD:d:%s%sf:F:g:hHi:%s%slL:%s%snNo:Op:QPr:%ss:S:tT:u:"
+               "?a%sbc:%sD:d:%s%sf:F:g:hHi:%s%slL:%s%snNyo:Op:QPr:%ss:S:tT:u:"
                "UvVwx:%s%s%s",
 
 #if defined(HAS_AFS) && defined(HASAOPT)
@@ -700,6 +700,9 @@ int main(int argc, char *argv[]) {
         case 'N':
             Fnfs = 1;
             break;
+        case 'y':
+            Selflags |= SELSTALE;
+            break;
         case 'o':
             if (!GOv || *GOv == '-' || *GOv == '+') {
                 Foffset = 1;
diff --git a/src/usage.c b/src/usage.c
index 546b3ae..df604a8 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -299,7 +299,7 @@ void usage(struct lsof_context *ctx, /* context */
         (void)fprintf(stderr, " latest FAQ: %s\n", LSOF_FAQ_URL);
         (void)fprintf(stderr, " latest (non-formatted) man page: %s\n",
                       LSOF_MAN_URL);
-        (void)fprintf(stderr, " usage: [-?ab%shH%slnNoOP%s%stUvV%s]",
+        (void)fprintf(stderr, " usage: [-?ab%shH%slnNoyOP%s%stUvV%s]",
 
 #if defined(HASNCACHE)
                       "C",
@@ -513,6 +513,7 @@ void usage(struct lsof_context *ctx, /* context */
         col = print_in_col(col, "-l list UID numbers");
         col = print_in_col(col, "-n no host names");
         col = print_in_col(col, "-N select NFS files");
+        col = print_in_col(col, "-y select stale file handles (ESTALE)");
         col = print_in_col(col, "-o list file offset");
         col = print_in_col(col, "-O no overhead *RISKY*");
         col = print_in_col(col, "-P no port names");

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions