fs, proc: introduce /proc/<pid>/task/<tid>/children entry

When we do checkpoint of a task we need to know the list of children the
task, has but there is no easy and fast way to generate reverse
parent->children chain from arbitrary <pid> (while a parent pid is
provided in "PPid" field of /proc/<pid>/status).

So instead of walking over all pids in the system (creating one big
process tree in memory, just to figure out which children a task has) --
we add explicit /proc/<pid>/task/<tid>/children entry, because the kernel
already has this kind of information but it is not yet exported.

This is a first level children, not the whole process tree.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Cyrill Gorcunov
2012-05-31 16:26:43 -07:00
committed by Linus Torvalds
parent 98ed57eef9
commit 818411616b
4 changed files with 145 additions and 0 deletions

View File

@@ -565,3 +565,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
static struct pid *
get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
{
struct task_struct *start, *task;
struct pid *pid = NULL;
read_lock(&tasklist_lock);
start = pid_task(proc_pid(inode), PIDTYPE_PID);
if (!start)
goto out;
/*
* Lets try to continue searching first, this gives
* us significant speedup on children-rich processes.
*/
if (pid_prev) {
task = pid_task(pid_prev, PIDTYPE_PID);
if (task && task->real_parent == start &&
!(list_empty(&task->sibling))) {
if (list_is_last(&task->sibling, &start->children))
goto out;
task = list_first_entry(&task->sibling,
struct task_struct, sibling);
pid = get_pid(task_pid(task));
goto out;
}
}
/*
* Slow search case.
*
* We might miss some children here if children
* are exited while we were not holding the lock,
* but it was never promised to be accurate that
* much.
*
* "Just suppose that the parent sleeps, but N children
* exit after we printed their tids. Now the slow paths
* skips N extra children, we miss N tasks." (c)
*
* So one need to stop or freeze the leader and all
* its children to get a precise result.
*/
list_for_each_entry(task, &start->children, sibling) {
if (pos-- == 0) {
pid = get_pid(task_pid(task));
break;
}
}
out:
read_unlock(&tasklist_lock);
return pid;
}
static int children_seq_show(struct seq_file *seq, void *v)
{
struct inode *inode = seq->private;
pid_t pid;
pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
return seq_printf(seq, "%d ", pid);
}
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
{
return get_children_pid(seq->private, NULL, *pos);
}
static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct pid *pid;
pid = get_children_pid(seq->private, v, *pos + 1);
put_pid(v);
++*pos;
return pid;
}
static void children_seq_stop(struct seq_file *seq, void *v)
{
put_pid(v);
}
static const struct seq_operations children_seq_ops = {
.start = children_seq_start,
.next = children_seq_next,
.stop = children_seq_stop,
.show = children_seq_show,
};
static int children_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *m;
int ret;
ret = seq_open(file, &children_seq_ops);
if (ret)
return ret;
m = file->private_data;
m->private = inode;
return ret;
}
int children_seq_release(struct inode *inode, struct file *file)
{
seq_release(inode, file);
return 0;
}
const struct file_operations proc_tid_children_operations = {
.open = children_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = children_seq_release,
};
#endif /* CONFIG_CHECKPOINT_RESTORE */