fs, proc: introduce /proc/<pid>/task/<tid>/children entry
When we do checkpoint of a task we need to know the list of children the task, has but there is no easy and fast way to generate reverse parent->children chain from arbitrary <pid> (while a parent pid is provided in "PPid" field of /proc/<pid>/status). So instead of walking over all pids in the system (creating one big process tree in memory, just to figure out which children a task has) -- we add explicit /proc/<pid>/task/<tid>/children entry, because the kernel already has this kind of information but it is not yet exported. This is a first level children, not the whole process tree. Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Kees Cook <keescook@chromium.org> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Serge Hallyn <serge.hallyn@canonical.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
98ed57eef9
commit
818411616b
123
fs/proc/array.c
123
fs/proc/array.c
@@ -565,3 +565,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
static struct pid *
|
||||
get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
|
||||
{
|
||||
struct task_struct *start, *task;
|
||||
struct pid *pid = NULL;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
start = pid_task(proc_pid(inode), PIDTYPE_PID);
|
||||
if (!start)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Lets try to continue searching first, this gives
|
||||
* us significant speedup on children-rich processes.
|
||||
*/
|
||||
if (pid_prev) {
|
||||
task = pid_task(pid_prev, PIDTYPE_PID);
|
||||
if (task && task->real_parent == start &&
|
||||
!(list_empty(&task->sibling))) {
|
||||
if (list_is_last(&task->sibling, &start->children))
|
||||
goto out;
|
||||
task = list_first_entry(&task->sibling,
|
||||
struct task_struct, sibling);
|
||||
pid = get_pid(task_pid(task));
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow search case.
|
||||
*
|
||||
* We might miss some children here if children
|
||||
* are exited while we were not holding the lock,
|
||||
* but it was never promised to be accurate that
|
||||
* much.
|
||||
*
|
||||
* "Just suppose that the parent sleeps, but N children
|
||||
* exit after we printed their tids. Now the slow paths
|
||||
* skips N extra children, we miss N tasks." (c)
|
||||
*
|
||||
* So one need to stop or freeze the leader and all
|
||||
* its children to get a precise result.
|
||||
*/
|
||||
list_for_each_entry(task, &start->children, sibling) {
|
||||
if (pos-- == 0) {
|
||||
pid = get_pid(task_pid(task));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
read_unlock(&tasklist_lock);
|
||||
return pid;
|
||||
}
|
||||
|
||||
static int children_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct inode *inode = seq->private;
|
||||
pid_t pid;
|
||||
|
||||
pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
|
||||
return seq_printf(seq, "%d ", pid);
|
||||
}
|
||||
|
||||
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
return get_children_pid(seq->private, NULL, *pos);
|
||||
}
|
||||
|
||||
static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct pid *pid;
|
||||
|
||||
pid = get_children_pid(seq->private, v, *pos + 1);
|
||||
put_pid(v);
|
||||
|
||||
++*pos;
|
||||
return pid;
|
||||
}
|
||||
|
||||
static void children_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
put_pid(v);
|
||||
}
|
||||
|
||||
static const struct seq_operations children_seq_ops = {
|
||||
.start = children_seq_start,
|
||||
.next = children_seq_next,
|
||||
.stop = children_seq_stop,
|
||||
.show = children_seq_show,
|
||||
};
|
||||
|
||||
static int children_seq_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *m;
|
||||
int ret;
|
||||
|
||||
ret = seq_open(file, &children_seq_ops);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
m = file->private_data;
|
||||
m->private = inode;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int children_seq_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
seq_release(inode, file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations proc_tid_children_operations = {
|
||||
.open = children_seq_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = children_seq_release,
|
||||
};
|
||||
#endif /* CONFIG_CHECKPOINT_RESTORE */
|
||||
|
||||
Reference in New Issue
Block a user