UPSTREAM: af_unix: Replace garbage collection algorithm.
commit 4090fa373f0e763c43610853d2774b5979915959 upstream.
If we find a dead SCC during iteration, we call unix_collect_skb()
to splice all skb in the SCC to the global sk_buff_head, hitlist.
After iterating all SCC, we unlock unix_gc_lock and purge the queue.
Bug: 404256079
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20240325202425.60930-15-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Lee Jones <lee@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit de7921631f
)
Signed-off-by: Lee Jones <joneslee@google.com>
Change-Id: Ia3698af8bf0d00775e4a18fea6b137c9fac70173
This commit is contained in:
committed by
Lee Jones
parent
67a3a58da1
commit
11d208f893
@@ -100498,12 +100498,6 @@ member {
|
|||||||
type_id: 0x246497bd
|
type_id: 0x246497bd
|
||||||
offset: 128
|
offset: 128
|
||||||
}
|
}
|
||||||
member {
|
|
||||||
id: 0x805723f7
|
|
||||||
name: "gc_flags"
|
|
||||||
type_id: 0x33756485
|
|
||||||
offset: 8256
|
|
||||||
}
|
|
||||||
member {
|
member {
|
||||||
id: 0x8fd944b0
|
id: 0x8fd944b0
|
||||||
name: "gc_interval"
|
name: "gc_interval"
|
||||||
@@ -112771,12 +112765,6 @@ member {
|
|||||||
type_id: 0x6d7f5ff6
|
type_id: 0x6d7f5ff6
|
||||||
offset: 16320
|
offset: 16320
|
||||||
}
|
}
|
||||||
member {
|
|
||||||
id: 0x79c0ce48
|
|
||||||
name: "inflight"
|
|
||||||
type_id: 0x33756485
|
|
||||||
offset: 8128
|
|
||||||
}
|
|
||||||
member {
|
member {
|
||||||
id: 0x79f74293
|
id: 0x79f74293
|
||||||
name: "inflight"
|
name: "inflight"
|
||||||
@@ -123480,12 +123468,6 @@ member {
|
|||||||
name: "link"
|
name: "link"
|
||||||
type_id: 0xd3c80119
|
type_id: 0xd3c80119
|
||||||
}
|
}
|
||||||
member {
|
|
||||||
id: 0x556d5b03
|
|
||||||
name: "link"
|
|
||||||
type_id: 0xd3c80119
|
|
||||||
offset: 8000
|
|
||||||
}
|
|
||||||
member {
|
member {
|
||||||
id: 0x556d5d6f
|
id: 0x556d5d6f
|
||||||
name: "link"
|
name: "link"
|
||||||
@@ -125010,6 +124992,12 @@ member {
|
|||||||
name: "lock"
|
name: "lock"
|
||||||
type_id: 0xf4933b90
|
type_id: 0xf4933b90
|
||||||
}
|
}
|
||||||
|
member {
|
||||||
|
id: 0x2d1fe06a
|
||||||
|
name: "lock"
|
||||||
|
type_id: 0xf313e71a
|
||||||
|
offset: 8000
|
||||||
|
}
|
||||||
member {
|
member {
|
||||||
id: 0x2d1fe165
|
id: 0x2d1fe165
|
||||||
name: "lock"
|
name: "lock"
|
||||||
@@ -125202,12 +125190,6 @@ member {
|
|||||||
type_id: 0xf313e71a
|
type_id: 0xf313e71a
|
||||||
offset: 320
|
offset: 320
|
||||||
}
|
}
|
||||||
member {
|
|
||||||
id: 0x2d1feb84
|
|
||||||
name: "lock"
|
|
||||||
type_id: 0xf313e71a
|
|
||||||
offset: 8192
|
|
||||||
}
|
|
||||||
member {
|
member {
|
||||||
id: 0x2d1feb9a
|
id: 0x2d1feb9a
|
||||||
name: "lock"
|
name: "lock"
|
||||||
@@ -148618,10 +148600,10 @@ member {
|
|||||||
offset: 896
|
offset: 896
|
||||||
}
|
}
|
||||||
member {
|
member {
|
||||||
id: 0x337b4b40
|
id: 0x337b4570
|
||||||
name: "oob_skb"
|
name: "oob_skb"
|
||||||
type_id: 0x054f691a
|
type_id: 0x054f691a
|
||||||
offset: 9600
|
offset: 9088
|
||||||
}
|
}
|
||||||
member {
|
member {
|
||||||
id: 0x97510416
|
id: 0x97510416
|
||||||
@@ -154605,16 +154587,16 @@ member {
|
|||||||
offset: 304
|
offset: 304
|
||||||
}
|
}
|
||||||
member {
|
member {
|
||||||
id: 0x6a6786bf
|
id: 0x6a6789c8
|
||||||
name: "peer_wake"
|
name: "peer_wake"
|
||||||
type_id: 0x347ff86e
|
type_id: 0x347ff86e
|
||||||
offset: 9216
|
offset: 8704
|
||||||
}
|
}
|
||||||
member {
|
member {
|
||||||
id: 0x337f3f92
|
id: 0x337f3641
|
||||||
name: "peer_wq"
|
name: "peer_wq"
|
||||||
type_id: 0x5eee2044
|
type_id: 0x5eee2044
|
||||||
offset: 8704
|
offset: 8192
|
||||||
}
|
}
|
||||||
member {
|
member {
|
||||||
id: 0xc29a8766
|
id: 0xc29a8766
|
||||||
@@ -180668,10 +180650,10 @@ member {
|
|||||||
type_id: 0xc9082b19
|
type_id: 0xc9082b19
|
||||||
}
|
}
|
||||||
member {
|
member {
|
||||||
id: 0xac8121db
|
id: 0xac812894
|
||||||
name: "scm_stat"
|
name: "scm_stat"
|
||||||
type_id: 0xfa80d3fa
|
type_id: 0xfa80d3fa
|
||||||
offset: 9536
|
offset: 9024
|
||||||
}
|
}
|
||||||
member {
|
member {
|
||||||
id: 0xe94f5bce
|
id: 0xe94f5bce
|
||||||
@@ -277497,7 +277479,7 @@ struct_union {
|
|||||||
kind: STRUCT
|
kind: STRUCT
|
||||||
name: "unix_sock"
|
name: "unix_sock"
|
||||||
definition {
|
definition {
|
||||||
bytesize: 1216
|
bytesize: 1152
|
||||||
member_id: 0x82ce9da8
|
member_id: 0x82ce9da8
|
||||||
member_id: 0x24257f11
|
member_id: 0x24257f11
|
||||||
member_id: 0x77353e9c
|
member_id: 0x77353e9c
|
||||||
@@ -277506,14 +277488,11 @@ struct_union {
|
|||||||
member_id: 0xb794a532
|
member_id: 0xb794a532
|
||||||
member_id: 0x5eddc09d
|
member_id: 0x5eddc09d
|
||||||
member_id: 0xe2a2a0b6
|
member_id: 0xe2a2a0b6
|
||||||
member_id: 0x556d5b03
|
member_id: 0x2d1fe06a
|
||||||
member_id: 0x79c0ce48
|
member_id: 0x337f3641
|
||||||
member_id: 0x2d1feb84
|
member_id: 0x6a6789c8
|
||||||
member_id: 0x805723f7
|
member_id: 0xac812894
|
||||||
member_id: 0x337f3f92
|
member_id: 0x337b4570
|
||||||
member_id: 0x6a6786bf
|
|
||||||
member_id: 0xac8121db
|
|
||||||
member_id: 0x337b4b40
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
struct_union {
|
struct_union {
|
||||||
|
@@ -192,3 +192,13 @@ type 'struct unix_vertex' changed
|
|||||||
member 'unsigned long lowlink' was removed
|
member 'unsigned long lowlink' was removed
|
||||||
member 'unsigned long scc_index' was added
|
member 'unsigned long scc_index' was added
|
||||||
|
|
||||||
|
type 'struct unix_sock' changed
|
||||||
|
byte size changed from 1216 to 1152
|
||||||
|
member 'struct list_head link' was removed
|
||||||
|
member 'unsigned long inflight' was removed
|
||||||
|
member 'spinlock_t lock' changed
|
||||||
|
offset changed by -192
|
||||||
|
member 'unsigned long gc_flags' was removed
|
||||||
|
4 members ('struct socket_wq peer_wq' .. 'struct sk_buff* oob_skb') changed
|
||||||
|
offset changed by -512
|
||||||
|
|
||||||
|
@@ -19,9 +19,6 @@ static inline struct unix_sock *unix_get_socket(struct file *filp)
|
|||||||
|
|
||||||
extern spinlock_t unix_gc_lock;
|
extern spinlock_t unix_gc_lock;
|
||||||
extern unsigned int unix_tot_inflight;
|
extern unsigned int unix_tot_inflight;
|
||||||
|
|
||||||
void unix_inflight(struct user_struct *user, struct file *fp);
|
|
||||||
void unix_notinflight(struct user_struct *user, struct file *fp);
|
|
||||||
void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
|
void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
|
||||||
void unix_del_edges(struct scm_fp_list *fpl);
|
void unix_del_edges(struct scm_fp_list *fpl);
|
||||||
void unix_update_edges(struct unix_sock *receiver);
|
void unix_update_edges(struct unix_sock *receiver);
|
||||||
@@ -85,12 +82,7 @@ struct unix_sock {
|
|||||||
struct sock *peer;
|
struct sock *peer;
|
||||||
struct unix_vertex *vertex;
|
struct unix_vertex *vertex;
|
||||||
struct sock *listener;
|
struct sock *listener;
|
||||||
struct list_head link;
|
|
||||||
unsigned long inflight;
|
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
unsigned long gc_flags;
|
|
||||||
#define UNIX_GC_CANDIDATE 0
|
|
||||||
#define UNIX_GC_MAYBE_CYCLE 1
|
|
||||||
struct socket_wq peer_wq;
|
struct socket_wq peer_wq;
|
||||||
wait_queue_entry_t peer_wake;
|
wait_queue_entry_t peer_wake;
|
||||||
struct scm_stat scm_stat;
|
struct scm_stat scm_stat;
|
||||||
|
@@ -979,12 +979,10 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
|
|||||||
sk->sk_destruct = unix_sock_destructor;
|
sk->sk_destruct = unix_sock_destructor;
|
||||||
u = unix_sk(sk);
|
u = unix_sk(sk);
|
||||||
u->listener = NULL;
|
u->listener = NULL;
|
||||||
u->inflight = 0;
|
|
||||||
u->vertex = NULL;
|
u->vertex = NULL;
|
||||||
u->path.dentry = NULL;
|
u->path.dentry = NULL;
|
||||||
u->path.mnt = NULL;
|
u->path.mnt = NULL;
|
||||||
spin_lock_init(&u->lock);
|
spin_lock_init(&u->lock);
|
||||||
INIT_LIST_HEAD(&u->link);
|
|
||||||
mutex_init(&u->iolock); /* single task reading lock */
|
mutex_init(&u->iolock); /* single task reading lock */
|
||||||
mutex_init(&u->bindlock); /* single task binding lock */
|
mutex_init(&u->bindlock); /* single task binding lock */
|
||||||
init_waitqueue_head(&u->peer_wait);
|
init_waitqueue_head(&u->peer_wait);
|
||||||
@@ -1770,8 +1768,6 @@ static inline bool too_many_unix_fds(struct task_struct *p)
|
|||||||
|
|
||||||
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
|
|
||||||
if (too_many_unix_fds(current))
|
if (too_many_unix_fds(current))
|
||||||
return -ETOOMANYREFS;
|
return -ETOOMANYREFS;
|
||||||
|
|
||||||
@@ -1783,9 +1779,6 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
|||||||
if (!UNIXCB(skb).fp)
|
if (!UNIXCB(skb).fp)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
for (i = scm->fp->count - 1; i >= 0; i--)
|
|
||||||
unix_inflight(scm->fp->user, scm->fp->fp[i]);
|
|
||||||
|
|
||||||
if (unix_prepare_fpl(UNIXCB(skb).fp))
|
if (unix_prepare_fpl(UNIXCB(skb).fp))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
@@ -1794,15 +1787,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
|||||||
|
|
||||||
static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
|
|
||||||
scm->fp = UNIXCB(skb).fp;
|
scm->fp = UNIXCB(skb).fp;
|
||||||
UNIXCB(skb).fp = NULL;
|
UNIXCB(skb).fp = NULL;
|
||||||
|
|
||||||
unix_destroy_fpl(scm->fp);
|
unix_destroy_fpl(scm->fp);
|
||||||
|
|
||||||
for (i = scm->fp->count - 1; i >= 0; i--)
|
|
||||||
unix_notinflight(scm->fp->user, scm->fp->fp[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
||||||
|
@@ -322,6 +322,52 @@ static bool unix_vertex_dead(struct unix_vertex *vertex)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum unix_recv_queue_lock_class {
|
||||||
|
U_RECVQ_LOCK_NORMAL,
|
||||||
|
U_RECVQ_LOCK_EMBRYO,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
|
||||||
|
{
|
||||||
|
struct unix_vertex *vertex;
|
||||||
|
|
||||||
|
list_for_each_entry_reverse(vertex, scc, scc_entry) {
|
||||||
|
struct sk_buff_head *queue;
|
||||||
|
struct unix_edge *edge;
|
||||||
|
struct unix_sock *u;
|
||||||
|
|
||||||
|
edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
|
||||||
|
u = edge->predecessor;
|
||||||
|
queue = &u->sk.sk_receive_queue;
|
||||||
|
|
||||||
|
spin_lock(&queue->lock);
|
||||||
|
|
||||||
|
if (u->sk.sk_state == TCP_LISTEN) {
|
||||||
|
struct sk_buff *skb;
|
||||||
|
|
||||||
|
skb_queue_walk(queue, skb) {
|
||||||
|
struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
|
||||||
|
|
||||||
|
/* listener -> embryo order, the inversion never happens. */
|
||||||
|
spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO);
|
||||||
|
skb_queue_splice_init(embryo_queue, hitlist);
|
||||||
|
spin_unlock(&embryo_queue->lock);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
skb_queue_splice_init(queue, hitlist);
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
|
||||||
|
if (u->oob_skb) {
|
||||||
|
kfree_skb(u->oob_skb);
|
||||||
|
u->oob_skb = NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&queue->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool unix_scc_cyclic(struct list_head *scc)
|
static bool unix_scc_cyclic(struct list_head *scc)
|
||||||
{
|
{
|
||||||
struct unix_vertex *vertex;
|
struct unix_vertex *vertex;
|
||||||
@@ -345,7 +391,8 @@ static bool unix_scc_cyclic(struct list_head *scc)
|
|||||||
static LIST_HEAD(unix_visited_vertices);
|
static LIST_HEAD(unix_visited_vertices);
|
||||||
static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
|
static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
|
||||||
|
|
||||||
static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index)
|
static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index,
|
||||||
|
struct sk_buff_head *hitlist)
|
||||||
{
|
{
|
||||||
LIST_HEAD(vertex_stack);
|
LIST_HEAD(vertex_stack);
|
||||||
struct unix_edge *edge;
|
struct unix_edge *edge;
|
||||||
@@ -430,7 +477,9 @@ prev_vertex:
|
|||||||
scc_dead = unix_vertex_dead(vertex);
|
scc_dead = unix_vertex_dead(vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!unix_graph_maybe_cyclic)
|
if (scc_dead)
|
||||||
|
unix_collect_skb(&scc, hitlist);
|
||||||
|
else if (!unix_graph_maybe_cyclic)
|
||||||
unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
|
unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
|
||||||
|
|
||||||
list_del(&scc);
|
list_del(&scc);
|
||||||
@@ -441,7 +490,7 @@ prev_vertex:
|
|||||||
goto prev_vertex;
|
goto prev_vertex;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void unix_walk_scc(void)
|
static void unix_walk_scc(struct sk_buff_head *hitlist)
|
||||||
{
|
{
|
||||||
unsigned long last_index = UNIX_VERTEX_INDEX_START;
|
unsigned long last_index = UNIX_VERTEX_INDEX_START;
|
||||||
|
|
||||||
@@ -454,7 +503,7 @@ static void unix_walk_scc(void)
|
|||||||
struct unix_vertex *vertex;
|
struct unix_vertex *vertex;
|
||||||
|
|
||||||
vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
|
vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
|
||||||
__unix_walk_scc(vertex, &last_index);
|
__unix_walk_scc(vertex, &last_index, hitlist);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
|
list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
|
||||||
@@ -463,7 +512,7 @@ static void unix_walk_scc(void)
|
|||||||
unix_graph_grouped = true;
|
unix_graph_grouped = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void unix_walk_scc_fast(void)
|
static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
|
||||||
{
|
{
|
||||||
while (!list_empty(&unix_unvisited_vertices)) {
|
while (!list_empty(&unix_unvisited_vertices)) {
|
||||||
struct unix_vertex *vertex;
|
struct unix_vertex *vertex;
|
||||||
@@ -480,279 +529,40 @@ static void unix_walk_scc_fast(void)
|
|||||||
scc_dead = unix_vertex_dead(vertex);
|
scc_dead = unix_vertex_dead(vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (scc_dead)
|
||||||
|
unix_collect_skb(&scc, hitlist);
|
||||||
|
|
||||||
list_del(&scc);
|
list_del(&scc);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
|
list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
|
||||||
}
|
}
|
||||||
|
|
||||||
static LIST_HEAD(gc_candidates);
|
|
||||||
static LIST_HEAD(gc_inflight_list);
|
|
||||||
|
|
||||||
/* Keep the number of times in flight count for the file
|
|
||||||
* descriptor if it is for an AF_UNIX socket.
|
|
||||||
*/
|
|
||||||
void unix_inflight(struct user_struct *user, struct file *filp)
|
|
||||||
{
|
|
||||||
struct unix_sock *u = unix_get_socket(filp);
|
|
||||||
|
|
||||||
spin_lock(&unix_gc_lock);
|
|
||||||
|
|
||||||
if (u) {
|
|
||||||
if (!u->inflight) {
|
|
||||||
WARN_ON_ONCE(!list_empty(&u->link));
|
|
||||||
list_add_tail(&u->link, &gc_inflight_list);
|
|
||||||
} else {
|
|
||||||
WARN_ON_ONCE(list_empty(&u->link));
|
|
||||||
}
|
|
||||||
u->inflight++;
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock(&unix_gc_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void unix_notinflight(struct user_struct *user, struct file *filp)
|
|
||||||
{
|
|
||||||
struct unix_sock *u = unix_get_socket(filp);
|
|
||||||
|
|
||||||
spin_lock(&unix_gc_lock);
|
|
||||||
|
|
||||||
if (u) {
|
|
||||||
WARN_ON_ONCE(!u->inflight);
|
|
||||||
WARN_ON_ONCE(list_empty(&u->link));
|
|
||||||
|
|
||||||
u->inflight--;
|
|
||||||
if (!u->inflight)
|
|
||||||
list_del_init(&u->link);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock(&unix_gc_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
|
|
||||||
struct sk_buff_head *hitlist)
|
|
||||||
{
|
|
||||||
struct sk_buff *skb;
|
|
||||||
struct sk_buff *next;
|
|
||||||
|
|
||||||
spin_lock(&x->sk_receive_queue.lock);
|
|
||||||
skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
|
|
||||||
/* Do we have file descriptors ? */
|
|
||||||
if (UNIXCB(skb).fp) {
|
|
||||||
bool hit = false;
|
|
||||||
/* Process the descriptors of this socket */
|
|
||||||
int nfd = UNIXCB(skb).fp->count;
|
|
||||||
struct file **fp = UNIXCB(skb).fp->fp;
|
|
||||||
|
|
||||||
while (nfd--) {
|
|
||||||
/* Get the socket the fd matches if it indeed does so */
|
|
||||||
struct unix_sock *u = unix_get_socket(*fp++);
|
|
||||||
|
|
||||||
/* Ignore non-candidates, they could have been added
|
|
||||||
* to the queues after starting the garbage collection
|
|
||||||
*/
|
|
||||||
if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
|
|
||||||
hit = true;
|
|
||||||
|
|
||||||
func(u);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (hit && hitlist != NULL) {
|
|
||||||
__skb_unlink(skb, &x->sk_receive_queue);
|
|
||||||
__skb_queue_tail(hitlist, skb);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(&x->sk_receive_queue.lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
|
|
||||||
struct sk_buff_head *hitlist)
|
|
||||||
{
|
|
||||||
if (x->sk_state != TCP_LISTEN) {
|
|
||||||
scan_inflight(x, func, hitlist);
|
|
||||||
} else {
|
|
||||||
struct sk_buff *skb;
|
|
||||||
struct sk_buff *next;
|
|
||||||
struct unix_sock *u;
|
|
||||||
LIST_HEAD(embryos);
|
|
||||||
|
|
||||||
/* For a listening socket collect the queued embryos
|
|
||||||
* and perform a scan on them as well.
|
|
||||||
*/
|
|
||||||
spin_lock(&x->sk_receive_queue.lock);
|
|
||||||
skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
|
|
||||||
u = unix_sk(skb->sk);
|
|
||||||
|
|
||||||
/* An embryo cannot be in-flight, so it's safe
|
|
||||||
* to use the list link.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(!list_empty(&u->link));
|
|
||||||
list_add_tail(&u->link, &embryos);
|
|
||||||
}
|
|
||||||
spin_unlock(&x->sk_receive_queue.lock);
|
|
||||||
|
|
||||||
while (!list_empty(&embryos)) {
|
|
||||||
u = list_entry(embryos.next, struct unix_sock, link);
|
|
||||||
scan_inflight(&u->sk, func, hitlist);
|
|
||||||
list_del_init(&u->link);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dec_inflight(struct unix_sock *usk)
|
|
||||||
{
|
|
||||||
usk->inflight--;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void inc_inflight(struct unix_sock *usk)
|
|
||||||
{
|
|
||||||
usk->inflight++;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void inc_inflight_move_tail(struct unix_sock *u)
|
|
||||||
{
|
|
||||||
u->inflight++;
|
|
||||||
|
|
||||||
/* If this still might be part of a cycle, move it to the end
|
|
||||||
* of the list, so that it's checked even if it was already
|
|
||||||
* passed over
|
|
||||||
*/
|
|
||||||
if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
|
|
||||||
list_move_tail(&u->link, &gc_candidates);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool gc_in_progress;
|
static bool gc_in_progress;
|
||||||
|
|
||||||
static void __unix_gc(struct work_struct *work)
|
static void __unix_gc(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct sk_buff_head hitlist;
|
struct sk_buff_head hitlist;
|
||||||
struct unix_sock *u, *next;
|
|
||||||
LIST_HEAD(not_cycle_list);
|
|
||||||
struct list_head cursor;
|
|
||||||
|
|
||||||
spin_lock(&unix_gc_lock);
|
spin_lock(&unix_gc_lock);
|
||||||
|
|
||||||
if (!unix_graph_maybe_cyclic)
|
if (!unix_graph_maybe_cyclic) {
|
||||||
|
spin_unlock(&unix_gc_lock);
|
||||||
goto skip_gc;
|
goto skip_gc;
|
||||||
|
}
|
||||||
|
|
||||||
|
__skb_queue_head_init(&hitlist);
|
||||||
|
|
||||||
if (unix_graph_grouped)
|
if (unix_graph_grouped)
|
||||||
unix_walk_scc_fast();
|
unix_walk_scc_fast(&hitlist);
|
||||||
else
|
else
|
||||||
unix_walk_scc();
|
unix_walk_scc(&hitlist);
|
||||||
|
|
||||||
/* First, select candidates for garbage collection. Only
|
|
||||||
* in-flight sockets are considered, and from those only ones
|
|
||||||
* which don't have any external reference.
|
|
||||||
*
|
|
||||||
* Holding unix_gc_lock will protect these candidates from
|
|
||||||
* being detached, and hence from gaining an external
|
|
||||||
* reference. Since there are no possible receivers, all
|
|
||||||
* buffers currently on the candidates' queues stay there
|
|
||||||
* during the garbage collection.
|
|
||||||
*
|
|
||||||
* We also know that no new candidate can be added onto the
|
|
||||||
* receive queues. Other, non candidate sockets _can_ be
|
|
||||||
* added to queue, so we must make sure only to touch
|
|
||||||
* candidates.
|
|
||||||
*
|
|
||||||
* Embryos, though never candidates themselves, affect which
|
|
||||||
* candidates are reachable by the garbage collector. Before
|
|
||||||
* being added to a listener's queue, an embryo may already
|
|
||||||
* receive data carrying SCM_RIGHTS, potentially making the
|
|
||||||
* passed socket a candidate that is not yet reachable by the
|
|
||||||
* collector. It becomes reachable once the embryo is
|
|
||||||
* enqueued. Therefore, we must ensure that no SCM-laden
|
|
||||||
* embryo appears in a (candidate) listener's queue between
|
|
||||||
* consecutive scan_children() calls.
|
|
||||||
*/
|
|
||||||
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
|
|
||||||
struct sock *sk = &u->sk;
|
|
||||||
long total_refs;
|
|
||||||
|
|
||||||
total_refs = file_count(sk->sk_socket->file);
|
|
||||||
|
|
||||||
WARN_ON_ONCE(!u->inflight);
|
|
||||||
WARN_ON_ONCE(total_refs < u->inflight);
|
|
||||||
if (total_refs == u->inflight) {
|
|
||||||
list_move_tail(&u->link, &gc_candidates);
|
|
||||||
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
|
|
||||||
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
|
|
||||||
|
|
||||||
if (sk->sk_state == TCP_LISTEN) {
|
|
||||||
unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
|
|
||||||
unix_state_unlock(sk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Now remove all internal in-flight reference to children of
|
|
||||||
* the candidates.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(u, &gc_candidates, link)
|
|
||||||
scan_children(&u->sk, dec_inflight, NULL);
|
|
||||||
|
|
||||||
/* Restore the references for children of all candidates,
|
|
||||||
* which have remaining references. Do this recursively, so
|
|
||||||
* only those remain, which form cyclic references.
|
|
||||||
*
|
|
||||||
* Use a "cursor" link, to make the list traversal safe, even
|
|
||||||
* though elements might be moved about.
|
|
||||||
*/
|
|
||||||
list_add(&cursor, &gc_candidates);
|
|
||||||
while (cursor.next != &gc_candidates) {
|
|
||||||
u = list_entry(cursor.next, struct unix_sock, link);
|
|
||||||
|
|
||||||
/* Move cursor to after the current position. */
|
|
||||||
list_move(&cursor, &u->link);
|
|
||||||
|
|
||||||
if (u->inflight) {
|
|
||||||
list_move_tail(&u->link, ¬_cycle_list);
|
|
||||||
__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
|
|
||||||
scan_children(&u->sk, inc_inflight_move_tail, NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
list_del(&cursor);
|
|
||||||
|
|
||||||
/* Now gc_candidates contains only garbage. Restore original
|
|
||||||
* inflight counters for these as well, and remove the skbuffs
|
|
||||||
* which are creating the cycle(s).
|
|
||||||
*/
|
|
||||||
skb_queue_head_init(&hitlist);
|
|
||||||
list_for_each_entry(u, &gc_candidates, link) {
|
|
||||||
scan_children(&u->sk, inc_inflight, &hitlist);
|
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
|
|
||||||
if (u->oob_skb) {
|
|
||||||
kfree_skb(u->oob_skb);
|
|
||||||
u->oob_skb = NULL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* not_cycle_list contains those sockets which do not make up a
|
|
||||||
* cycle. Restore these to the inflight list.
|
|
||||||
*/
|
|
||||||
while (!list_empty(¬_cycle_list)) {
|
|
||||||
u = list_entry(not_cycle_list.next, struct unix_sock, link);
|
|
||||||
__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
|
|
||||||
list_move_tail(&u->link, &gc_inflight_list);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock(&unix_gc_lock);
|
spin_unlock(&unix_gc_lock);
|
||||||
|
|
||||||
/* Here we are. Hitlist is filled. Die. */
|
|
||||||
__skb_queue_purge(&hitlist);
|
__skb_queue_purge(&hitlist);
|
||||||
|
|
||||||
spin_lock(&unix_gc_lock);
|
|
||||||
|
|
||||||
/* All candidates should have been detached by now. */
|
|
||||||
WARN_ON_ONCE(!list_empty(&gc_candidates));
|
|
||||||
skip_gc:
|
skip_gc:
|
||||||
/* Paired with READ_ONCE() in wait_for_unix_gc(). */
|
|
||||||
WRITE_ONCE(gc_in_progress, false);
|
WRITE_ONCE(gc_in_progress, false);
|
||||||
|
|
||||||
spin_unlock(&unix_gc_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static DECLARE_WORK(unix_gc_work, __unix_gc);
|
static DECLARE_WORK(unix_gc_work, __unix_gc);
|
||||||
|
Reference in New Issue
Block a user