From a130d07d24b95f5b807dd89b9d2302a873ba3bec Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 21 May 2025 14:45:30 +0000 Subject: [PATCH] UPSTREAM: af_unix: Try not to hold unix_gc_lock during accept(). commit fd86344823b521149bb31d91eba900ba3525efa6 upstream. Commit dcf70df2048d ("af_unix: Fix up unix_edge.successor for embryo socket.") added spin_lock(&unix_gc_lock) in accept() path, and it caused regression in a stress test as reported by kernel test robot. If the embryo socket is not part of the inflight graph, we need not hold the lock. To decide that in O(1) time and avoid the regression in the normal use case, 1. add a new stat unix_sk(sk)->scm_stat.nr_unix_fds 2. count the number of inflight AF_UNIX sockets in the receive queue under unix_state_lock() 3. move unix_update_edges() call under unix_state_lock() 4. avoid locking if nr_unix_fds is 0 in unix_update_edges() Bug: 404256079 Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202404101427.92a08551-oliver.sang@intel.com Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240413021928.20946-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3600729b7fc0564ccb1ceec493fa446858b6d95b) Signed-off-by: Lee Jones Change-Id: Id4a64f2453d757bb947259a106376d30a20218bb --- android/abi_gki_aarch64.stg | 15 +++++++++++---- android/abi_gki_aarch64.stg.allowed_breaks | 8 ++++++++ include/net/af_unix.h | 1 + net/unix/af_unix.c | 2 +- net/unix/garbage.c | 20 ++++++++++++++++---- 5 files changed, 37 insertions(+), 9 deletions(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 53d56c595dee..549171e2c3a4 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -144590,6 +144590,12 @@ member { type_id: 0x4585663f offset: 21760 } +member { + id: 0xb206d8fd + name: "nr_unix_fds" + type_id: 0x33756485 + offset: 64 +} member { id: 0xfb6f3a99 name: "nr_user" @@ -148600,10 +148606,10 @@ member { offset: 896 } member { - id: 0x337b4570 + id: 0x337b49fc name: "oob_skb" type_id: 0x054f691a - offset: 9088 + offset: 9152 } member { id: 0x97510416 @@ -267023,8 +267029,9 @@ struct_union { kind: STRUCT name: "scm_stat" definition { - bytesize: 4 + bytesize: 16 member_id: 0xc7d661bf + member_id: 0xb206d8fd } } struct_union { @@ -277492,7 +277499,7 @@ struct_union { member_id: 0x337f3641 member_id: 0x6a6789c8 member_id: 0xac812894 - member_id: 0x337b4570 + member_id: 0x337b49fc } } struct_union { diff --git a/android/abi_gki_aarch64.stg.allowed_breaks b/android/abi_gki_aarch64.stg.allowed_breaks index 2c974d1bf771..13804f4bb984 100644 --- a/android/abi_gki_aarch64.stg.allowed_breaks +++ b/android/abi_gki_aarch64.stg.allowed_breaks @@ -202,3 +202,11 @@ type 'struct unix_sock' changed 4 members ('struct socket_wq peer_wq' .. 'struct sk_buff* oob_skb') changed offset changed by -512 +type 'struct unix_sock' changed + member 'struct sk_buff* oob_skb' changed + offset changed by 64 + +type 'struct scm_stat' changed + byte size changed from 4 to 16 + member 'unsigned long nr_unix_fds' was added + diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 5d17ffacc66f..a320b08230b5 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -67,6 +67,7 @@ struct unix_skb_parms { struct scm_stat { atomic_t nr_fds; + unsigned long nr_unix_fds; }; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3ba6fed78ad8..542b4610a95e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1703,12 +1703,12 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, } tsk = skb->sk; - unix_update_edges(unix_sk(tsk)); skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); /* attach accepted sock to socket */ unix_state_lock(tsk); + unix_update_edges(unix_sk(tsk)); newsock->state = SS_CONNECTED; unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12a4ec27e0d4..95240a59808f 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -209,6 +209,7 @@ void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver) unix_add_edge(fpl, edge); } while (i < fpl->count_unix); + receiver->scm_stat.nr_unix_fds += fpl->count_unix; WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix); out: WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count); @@ -222,6 +223,7 @@ out: void unix_del_edges(struct scm_fp_list *fpl) { + struct unix_sock *receiver; int i = 0; spin_lock(&unix_gc_lock); @@ -235,6 +237,8 @@ void unix_del_edges(struct scm_fp_list *fpl) unix_del_edge(fpl, edge); } while (i < fpl->count_unix); + receiver = fpl->edges[0].successor; + receiver->scm_stat.nr_unix_fds -= fpl->count_unix; WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix); out: WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count); @@ -246,10 +250,18 @@ out: void unix_update_edges(struct unix_sock *receiver) { - spin_lock(&unix_gc_lock); - unix_update_graph(unix_sk(receiver->listener)->vertex); - receiver->listener = NULL; - spin_unlock(&unix_gc_lock); + /* nr_unix_fds is only updated under unix_state_lock(). + * If it's 0 here, the embryo socket is not part of the + * inflight graph, and GC will not see it, so no lock needed. + */ + if (!receiver->scm_stat.nr_unix_fds) { + receiver->listener = NULL; + } else { + spin_lock(&unix_gc_lock); + unix_update_graph(unix_sk(receiver->listener)->vertex); + receiver->listener = NULL; + spin_unlock(&unix_gc_lock); + } } int unix_prepare_fpl(struct scm_fp_list *fpl)