Skip to content

Double close fd cause crash on Android #3541

@wxFlymes

Description

@wxFlymes

libwebsockets version: 4.3.7
mbedtls version:3.6.4
platform: android arm 64

During the execution of stability tests, a crash occurs occasionally when lws_context_destroy is called.

After initial analysis, it was found that sockfd was reused by Android, and then lws_context_destroy might cause a double close. Due to the existence of the fdsan mechanism on the Android platform, fdscan eventually led to the program crash.

verbose 1 DEBUG[23828]:uid: 1310163
verbose 1 DEBUG[23828]:tagged_addr_ctrl: 0000000000000001 (PR_TAGGED_ADDR_ENABLE)
verbose 1 DEBUG[23828]:signal 6 (SIGABRT), code -1 (SI_QUEUE), fault addr --------
verbose 1 DEBUG[23828]:Abort message: 'fdsan: attempted to close file descriptor 570, expected to be unowned, actually owned by FileInputStream 0xb804729'
verbose 1 DEBUG[23828]:    x0  0000000000000000  x1  0000000000005b7b  x2  0000000000000006  x3  00000072acbfb5a0
verbose 1 DEBUG[23828]:    x4  631f646b68651f64  x5  631f646b68651f64  x6  631f646b68651f64  x7  7f7f7f7f7f7f7f7f
verbose 1 DEBUG[23828]:    x8  00000000000000f0  x9  5ebad600e2cae6c2  x10 00000072acbfaccc  x11 000000003b9ac9ff
verbose 1 DEBUG[23828]:    x12 000000768aa47020  x13 000000007fffffff  x14 000000000007fcd4  x15 000000096b85ada1
verbose 1 DEBUG[23828]:    x16 000000765a385d08  x17 000000765a359e90  x18 0000000000000000  x19 00000000000057ae
verbose 1 DEBUG[23828]:    x20 0000000000005b7b  x21 00000072acbfc000  x22 0000000000000003  x23 00000072acbfb398
verbose 1 DEBUG[23828]:    x24 00000072acbfb690  x25 00000072acbfb310  x26 00000072acbfb2d0  x27 ffffff80ffffffc8
verbose 1 DEBUG[23828]:    x28 0000000000000007  x29 00000072acbfb630
verbose 1 DEBUG[23828]:    lr  000000765a30dde4  sp  00000072acbfb250  pc  000000765a30de08  pst 0000000000001000
verbose 1 DEBUG[23828]:13 total frames
verbose 1 DEBUG[23828]:backtrace:
verbose 1 DEBUG[23828]:      #00 pc 000000000006ae08  /apex/com.android.runtime/lib64/bionic/libc.so (fdsan_error(char const*, ...)+556) (BuildId: a87908b48b368e6282bcc9f34bcfc28c)
verbose 1 DEBUG[23828]:      #01 pc 000000000006ab14  /apex/com.android.runtime/lib64/bionic/libc.so (android_fdsan_close_with_tag+804) (BuildId: a87908b48b368e6282bcc9f34bcfc28c)
verbose 1 DEBUG[23828]:      #02 pc 000000000006b28c  /apex/com.android.runtime/lib64/bionic/libc.so (close+16) (BuildId: a87908b48b368e6282bcc9f34bcfc28c)
verbose 1 DEBUG[23828]:      #03 pc 0000000000057c04  /websockets/source/libwebsockets-4.3.7/lib/core-net/close.c:897 __lws_close_free_wsi_final
verbose 1 DEBUG[23828]:      #04 pc 0000000000057aec  /websockets/source/libwebsockets-4.3.7/lib/core-net/close.c:875 __lws_close_free_wsi
verbose 1 DEBUG[23828]:      #05 pc 0000000000057ff4  /websockets/source/libwebsockets-4.3.7/lib/core-net/close.c:1036 lws_close_free_wsi
verbose 1 DEBUG[23828]:      #06 pc 000000000003fb40  /websockets/source/libwebsockets-4.3.7/lib/core/context.c:1910 lws_context_destroy

in close.c

void
__lws_close_free_wsi(struct lws *wsi, enum lws_close_status reason,
		     const char *caller)
{
	struct lws_context_per_thread *pt;
	const struct lws_protocols *pro;
	struct lws_context *context;
	struct lws *wsi1, *wsi2;
	int n, ccb;

	if (!wsi)
		return;

	lwsl_wsi_info(wsi, "caller: %s", caller);

	lws_access_log(wsi);

	if (!lws_dll2_is_detached(&wsi->dll_buflist))
		lwsl_wsi_info(wsi, "going down with stuff in buflist");

	context = wsi->a.context;
	pt = &context->pt[(int)wsi->tsi];

	if (pt->pipe_wsi == wsi) {
		**lws_plat_pipe_close(pt->pipe_wsi);**
		pt->pipe_wsi = NULL;
	}

lws_plat_pipe_close will close sockfd first time


void
__lws_close_free_wsi_final(struct lws *wsi)
{
	int n;

	if (!wsi->shadow &&
	    lws_socket_is_valid(wsi->desc.sockfd) && !lws_ssl_close(wsi)) {
		lwsl_wsi_debug(wsi, "fd %d", wsi->desc.sockfd);

		/*
		 * if this is the pt pipe, skip the actual close,
		 * go through the motions though so we will reach 0 open wsi
		 * on the pt, and trigger the pt destroy to close the pipe fds
		 */
		if (!lws_plat_pipe_is_fd_assocated(wsi->a.context, wsi->tsi,
						   wsi->desc.sockfd)) {
			**n = compatible_close(wsi->desc.sockfd)**;
			if (n)
				lwsl_wsi_debug(wsi, "closing: close ret %d",
					       LWS_ERRNO);
		}

compatible_close may close sockfd second

if the sockfd be reuse by others after first close, compatible_close may cause crash .

1. When __lws_close_free_wsi detects a pipe wsi, it calls lws_plat_pipe_close, which closes the pipe file descriptor and sets pt->dummy_pipe_fds[0] to -1, but wsi->desc.sockfd still retains its original value.
2. Subsequently, __lws_close_free_wsi_final checks lws_plat_pipe_is_fd_associated. Since pt->dummy_pipe_fds[0] is already -1, this function returns false.
3. As a result, __lws_close_free_wsi_final attempts to close the file descriptor again, causing a double-close.
4. Android's fdsan detects this and terminates the process.

We tried a fix, but we're not sure if it's reliable.

in unix-pipe.c

void
lws_plat_pipe_close(struct lws *wsi)
{
	struct lws_context_per_thread *pt = &wsi->a.context->pt[(int)wsi->tsi];

	if (pt->dummy_pipe_fds[0] && pt->dummy_pipe_fds[0] != -1) {
		**if (wsi->desc.sockfd == pt->dummy_pipe_fds[0])
			wsi->desc.sockfd = LWS_SOCK_INVALID;**
		close(pt->dummy_pipe_fds[0]);
		pt->dummy_pipe_fds[0] = -1;
	}
	if (pt->dummy_pipe_fds[1] && pt->dummy_pipe_fds[1] != -1) {
		**if (wsi->desc.sockfd == pt->dummy_pipe_fds[1])
			wsi->desc.sockfd = LWS_SOCK_INVALID;**
		close(pt->dummy_pipe_fds[1]);
		pt->dummy_pipe_fds[1] = -1;
	}
}

in close.c

void
__lws_close_free_wsi(struct lws *wsi, enum lws_close_status reason,
		     const char *caller)
{
	struct lws_context_per_thread *pt;
	const struct lws_protocols *pro;
	struct lws_context *context;
	struct lws *wsi1, *wsi2;
	int n, ccb;

	if (!wsi)
		return;

	lwsl_wsi_info(wsi, "caller: %s", caller);

	lws_access_log(wsi);

	if (!lws_dll2_is_detached(&wsi->dll_buflist))
		lwsl_wsi_info(wsi, "going down with stuff in buflist");

	context = wsi->a.context;
	pt = &context->pt[(int)wsi->tsi];

	if (pt->pipe_wsi == wsi) {
		**if (!wsi->shadow && lws_socket_is_valid(wsi->desc.sockfd)) {
			__remove_wsi_socket_from_fds(wsi);
			if (lws_socket_is_valid(wsi->desc.sockfd))
				delete_from_fd(wsi->a.context, wsi->desc.sockfd);
#if !defined(LWS_PLAT_FREERTOS) && !defined(WIN32) && !defined(LWS_PLAT_OPTEE)
			delete_from_fdwsi(wsi->a.context, wsi);
#endif
		}**
		lws_plat_pipe_close(pt->pipe_wsi);
		pt->pipe_wsi = NULL;
	}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions