Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc patches from Andrew Morton: - the "misc" tree - stuff from all over the map - checkpatch updates - fatfs - kmod changes - procfs - cpumask - UML - kexec - mqueue - rapidio - pidns - some checkpoint-restore feature work. Reluctantly. Most of it delayed a release. I'm still rather worried that we don't have a clear roadmap to completion for this work. * emailed from Andrew Morton <akpm@linux-foundation.org>: (78 patches) kconfig: update compression algorithm info c/r: prctl: add ability to set new mm_struct::exe_file c/r: prctl: extend PR_SET_MM to set up more mm_struct entries c/r: procfs: add arg_start/end, env_start/end and exit_code members to /proc/$pid/stat syscalls, x86: add __NR_kcmp syscall fs, proc: introduce /proc/<pid>/task/<tid>/children entry sysctl: make kernel.ns_last_pid control dependent on CHECKPOINT_RESTORE aio/vfs: cleanup of rw_copy_check_uvector() and compat_rw_copy_check_uvector() eventfd: change int to __u64 in eventfd_signal() fs/nls: add Apple NLS pidns: make killed children autoreap pidns: use task_active_pid_ns in do_notify_parent rapidio/tsi721: add DMA engine support rapidio: add DMA engine support for RIO data transfers ipc/mqueue: add rbtree node caching support tools/selftests: add mq_perf_tests ipc/mqueue: strengthen checks on mqueue creation ipc/mqueue: correct mq_attr_ok test ipc/mqueue: improve performance of send/recv selftests: add mq_open_tests ...
This commit is contained in:
@@ -25,6 +25,9 @@ endif
|
||||
obj-y += sched/
|
||||
obj-y += power/
|
||||
|
||||
ifeq ($(CONFIG_CHECKPOINT_RESTORE),y)
|
||||
obj-$(CONFIG_X86) += kcmp.o
|
||||
endif
|
||||
obj-$(CONFIG_FREEZER) += freezer.o
|
||||
obj-$(CONFIG_PROFILING) += profile.o
|
||||
obj-$(CONFIG_STACKTRACE) += stacktrace.o
|
||||
|
||||
44
kernel/cpu.c
44
kernel/cpu.c
@@ -10,7 +10,10 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/oom.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/mutex.h>
|
||||
@@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb)
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_cpu_notifier);
|
||||
|
||||
/**
|
||||
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
|
||||
* @cpu: a CPU id
|
||||
*
|
||||
* This function walks all processes, finds a valid mm struct for each one and
|
||||
* then clears a corresponding bit in mm's cpumask. While this all sounds
|
||||
* trivial, there are various non-obvious corner cases, which this function
|
||||
* tries to solve in a safe manner.
|
||||
*
|
||||
* Also note that the function uses a somewhat relaxed locking scheme, so it may
|
||||
* be called only for an already offlined CPU.
|
||||
*/
|
||||
void clear_tasks_mm_cpumask(int cpu)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
/*
|
||||
* This function is called after the cpu is taken down and marked
|
||||
* offline, so its not like new tasks will ever get this cpu set in
|
||||
* their mm mask. -- Peter Zijlstra
|
||||
* Thus, we may use rcu_read_lock() here, instead of grabbing
|
||||
* full-fledged tasklist_lock.
|
||||
*/
|
||||
WARN_ON(cpu_online(cpu));
|
||||
rcu_read_lock();
|
||||
for_each_process(p) {
|
||||
struct task_struct *t;
|
||||
|
||||
/*
|
||||
* Main thread might exit, but other threads may still have
|
||||
* a valid mm. Find one.
|
||||
*/
|
||||
t = find_lock_task_mm(p);
|
||||
if (!t)
|
||||
continue;
|
||||
cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
|
||||
task_unlock(t);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static inline void check_for_tasks(int cpu)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
@@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb)
|
||||
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
|
||||
|
||||
/**
|
||||
* cpm_pm_enter - CPU low power entry notifier
|
||||
* cpu_pm_enter - CPU low power entry notifier
|
||||
*
|
||||
* Notifies listeners that a single CPU is entering a low power state that may
|
||||
* cause some blocks in the same power domain as the cpu to reset.
|
||||
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
|
||||
* Must be called on the affected CPU with interrupts disabled. Platform is
|
||||
* responsible for ensuring that cpu_pm_enter is not called twice on the same
|
||||
* CPU before cpu_pm_exit is called. Notified drivers can include VFP
|
||||
* co-processor, interrupt controller and it's PM extensions, local CPU
|
||||
* co-processor, interrupt controller and its PM extensions, local CPU
|
||||
* timers context save/restore which shouldn't be interrupted. Hence it
|
||||
* must be called with interrupts disabled.
|
||||
*
|
||||
@@ -115,13 +115,13 @@ int cpu_pm_enter(void)
|
||||
EXPORT_SYMBOL_GPL(cpu_pm_enter);
|
||||
|
||||
/**
|
||||
* cpm_pm_exit - CPU low power exit notifier
|
||||
* cpu_pm_exit - CPU low power exit notifier
|
||||
*
|
||||
* Notifies listeners that a single CPU is exiting a low power state that may
|
||||
* have caused some blocks in the same power domain as the cpu to reset.
|
||||
*
|
||||
* Notified drivers can include VFP co-processor, interrupt controller
|
||||
* and it's PM extensions, local CPU timers context save/restore which
|
||||
* and its PM extensions, local CPU timers context save/restore which
|
||||
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
|
||||
*
|
||||
* Return conditions are same as __raw_notifier_call_chain.
|
||||
@@ -139,7 +139,7 @@ int cpu_pm_exit(void)
|
||||
EXPORT_SYMBOL_GPL(cpu_pm_exit);
|
||||
|
||||
/**
|
||||
* cpm_cluster_pm_enter - CPU cluster low power entry notifier
|
||||
* cpu_cluster_pm_enter - CPU cluster low power entry notifier
|
||||
*
|
||||
* Notifies listeners that all cpus in a power domain are entering a low power
|
||||
* state that may cause some blocks in the same power domain to reset.
|
||||
@@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit);
|
||||
* Must be called after cpu_pm_enter has been called on all cpus in the power
|
||||
* domain, and before cpu_pm_exit has been called on any cpu in the power
|
||||
* domain. Notified drivers can include VFP co-processor, interrupt controller
|
||||
* and it's PM extensions, local CPU timers context save/restore which
|
||||
* and its PM extensions, local CPU timers context save/restore which
|
||||
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
|
||||
*
|
||||
* Must be called with interrupts disabled.
|
||||
@@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void)
|
||||
EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
|
||||
|
||||
/**
|
||||
* cpm_cluster_pm_exit - CPU cluster low power exit notifier
|
||||
* cpu_cluster_pm_exit - CPU cluster low power exit notifier
|
||||
*
|
||||
* Notifies listeners that all cpus in a power domain are exiting form a
|
||||
* low power state that may have caused some blocks in the same power domain
|
||||
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
|
||||
* Must be called after cpu_pm_exit has been called on all cpus in the power
|
||||
* domain, and before cpu_pm_exit has been called on any cpu in the power
|
||||
* domain. Notified drivers can include VFP co-processor, interrupt controller
|
||||
* and it's PM extensions, local CPU timers context save/restore which
|
||||
* and its PM extensions, local CPU timers context save/restore which
|
||||
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
|
||||
*
|
||||
* Return conditions are same as __raw_notifier_call_chain.
|
||||
|
||||
@@ -884,9 +884,9 @@ static void check_stack_usage(void)
|
||||
|
||||
spin_lock(&low_water_lock);
|
||||
if (free < lowest_to_date) {
|
||||
printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
|
||||
"left\n",
|
||||
current->comm, free);
|
||||
printk(KERN_WARNING "%s (%d) used greatest stack depth: "
|
||||
"%lu bytes left\n",
|
||||
current->comm, task_pid_nr(current), free);
|
||||
lowest_to_date = free;
|
||||
}
|
||||
spin_unlock(&low_water_lock);
|
||||
@@ -1214,7 +1214,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
|
||||
unsigned long state;
|
||||
int retval, status, traced;
|
||||
pid_t pid = task_pid_vnr(p);
|
||||
uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid);
|
||||
uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
|
||||
struct siginfo __user *infop;
|
||||
|
||||
if (!likely(wo->wo_flags & WEXITED))
|
||||
|
||||
@@ -787,9 +787,6 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
/* Get rid of any cached register state */
|
||||
deactivate_mm(tsk, mm);
|
||||
|
||||
if (tsk->vfork_done)
|
||||
complete_vfork_done(tsk);
|
||||
|
||||
/*
|
||||
* If we're exiting normally, clear a user-space tid field if
|
||||
* requested. We leave this alone when dying by signal, to leave
|
||||
@@ -810,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
}
|
||||
tsk->clear_child_tid = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* All done, finally we can wake up parent and return this mm to him.
|
||||
* Also kthread_stop() uses this completion for synchronization.
|
||||
*/
|
||||
if (tsk->vfork_done)
|
||||
complete_vfork_done(tsk);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
* This file contains driver APIs to the irq subsystem.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "genirq: " fmt
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/module.h>
|
||||
@@ -565,7 +567,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
|
||||
* IRQF_TRIGGER_* but the PIC does not support multiple
|
||||
* flow-types?
|
||||
*/
|
||||
pr_debug("genirq: No set_type function for IRQ %d (%s)\n", irq,
|
||||
pr_debug("No set_type function for IRQ %d (%s)\n", irq,
|
||||
chip ? (chip->name ? : "unknown") : "unknown");
|
||||
return 0;
|
||||
}
|
||||
@@ -600,7 +602,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
pr_err("genirq: Setting trigger mode %lu for irq %u failed (%pF)\n",
|
||||
pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
|
||||
flags, irq, chip->irq_set_type);
|
||||
}
|
||||
if (unmask)
|
||||
@@ -837,7 +839,7 @@ void exit_irq_thread(void)
|
||||
|
||||
action = kthread_data(tsk);
|
||||
|
||||
pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
|
||||
pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
|
||||
tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
|
||||
|
||||
desc = irq_to_desc(action->irq);
|
||||
@@ -1044,7 +1046,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
|
||||
* has. The type flags are unreliable as the
|
||||
* underlying chip implementation can override them.
|
||||
*/
|
||||
pr_err("genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
|
||||
pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
|
||||
irq);
|
||||
ret = -EINVAL;
|
||||
goto out_mask;
|
||||
@@ -1095,7 +1097,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
|
||||
|
||||
if (nmsk != omsk)
|
||||
/* hope the handler works with current trigger mode */
|
||||
pr_warning("genirq: irq %d uses trigger mode %u; requested %u\n",
|
||||
pr_warning("irq %d uses trigger mode %u; requested %u\n",
|
||||
irq, nmsk, omsk);
|
||||
}
|
||||
|
||||
@@ -1133,7 +1135,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
|
||||
|
||||
mismatch:
|
||||
if (!(new->flags & IRQF_PROBE_SHARED)) {
|
||||
pr_err("genirq: Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
|
||||
pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
|
||||
irq, new->flags, new->name, old->flags, old->name);
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
dump_stack();
|
||||
|
||||
196
kernel/kcmp.c
Normal file
196
kernel/kcmp.c
Normal file
@@ -0,0 +1,196 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kcmp.h>
|
||||
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* We don't expose the real in-memory order of objects for security reasons.
|
||||
* But still the comparison results should be suitable for sorting. So we
|
||||
* obfuscate kernel pointers values and compare the production instead.
|
||||
*
|
||||
* The obfuscation is done in two steps. First we xor the kernel pointer with
|
||||
* a random value, which puts pointer into a new position in a reordered space.
|
||||
* Secondly we multiply the xor production with a large odd random number to
|
||||
* permute its bits even more (the odd multiplier guarantees that the product
|
||||
* is unique ever after the high bits are truncated, since any odd number is
|
||||
* relative prime to 2^n).
|
||||
*
|
||||
* Note also that the obfuscation itself is invisible to userspace and if needed
|
||||
* it can be changed to an alternate scheme.
|
||||
*/
|
||||
static unsigned long cookies[KCMP_TYPES][2] __read_mostly;
|
||||
|
||||
static long kptr_obfuscate(long v, int type)
|
||||
{
|
||||
return (v ^ cookies[type][0]) * cookies[type][1];
|
||||
}
|
||||
|
||||
/*
|
||||
* 0 - equal, i.e. v1 = v2
|
||||
* 1 - less than, i.e. v1 < v2
|
||||
* 2 - greater than, i.e. v1 > v2
|
||||
* 3 - not equal but ordering unavailable (reserved for future)
|
||||
*/
|
||||
static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type);
|
||||
|
||||
return (ret < 0) | ((ret > 0) << 1);
|
||||
}
|
||||
|
||||
/* The caller must have pinned the task */
|
||||
static struct file *
|
||||
get_file_raw_ptr(struct task_struct *task, unsigned int idx)
|
||||
{
|
||||
struct file *file = NULL;
|
||||
|
||||
task_lock(task);
|
||||
rcu_read_lock();
|
||||
|
||||
if (task->files)
|
||||
file = fcheck_files(task->files, idx);
|
||||
|
||||
rcu_read_unlock();
|
||||
task_unlock(task);
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
|
||||
{
|
||||
if (likely(m2 != m1))
|
||||
mutex_unlock(m2);
|
||||
mutex_unlock(m1);
|
||||
}
|
||||
|
||||
static int kcmp_lock(struct mutex *m1, struct mutex *m2)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (m2 > m1)
|
||||
swap(m1, m2);
|
||||
|
||||
err = mutex_lock_killable(m1);
|
||||
if (!err && likely(m1 != m2)) {
|
||||
err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
|
||||
if (err)
|
||||
mutex_unlock(m1);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
|
||||
unsigned long, idx1, unsigned long, idx2)
|
||||
{
|
||||
struct task_struct *task1, *task2;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/*
|
||||
* Tasks are looked up in caller's PID namespace only.
|
||||
*/
|
||||
task1 = find_task_by_vpid(pid1);
|
||||
task2 = find_task_by_vpid(pid2);
|
||||
if (!task1 || !task2)
|
||||
goto err_no_task;
|
||||
|
||||
get_task_struct(task1);
|
||||
get_task_struct(task2);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* One should have enough rights to inspect task details.
|
||||
*/
|
||||
ret = kcmp_lock(&task1->signal->cred_guard_mutex,
|
||||
&task2->signal->cred_guard_mutex);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
|
||||
!ptrace_may_access(task2, PTRACE_MODE_READ)) {
|
||||
ret = -EPERM;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case KCMP_FILE: {
|
||||
struct file *filp1, *filp2;
|
||||
|
||||
filp1 = get_file_raw_ptr(task1, idx1);
|
||||
filp2 = get_file_raw_ptr(task2, idx2);
|
||||
|
||||
if (filp1 && filp2)
|
||||
ret = kcmp_ptr(filp1, filp2, KCMP_FILE);
|
||||
else
|
||||
ret = -EBADF;
|
||||
break;
|
||||
}
|
||||
case KCMP_VM:
|
||||
ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
|
||||
break;
|
||||
case KCMP_FILES:
|
||||
ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
|
||||
break;
|
||||
case KCMP_FS:
|
||||
ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
|
||||
break;
|
||||
case KCMP_SIGHAND:
|
||||
ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
|
||||
break;
|
||||
case KCMP_IO:
|
||||
ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
|
||||
break;
|
||||
case KCMP_SYSVSEM:
|
||||
#ifdef CONFIG_SYSVIPC
|
||||
ret = kcmp_ptr(task1->sysvsem.undo_list,
|
||||
task2->sysvsem.undo_list,
|
||||
KCMP_SYSVSEM);
|
||||
#else
|
||||
ret = -EOPNOTSUPP;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
err_unlock:
|
||||
kcmp_unlock(&task1->signal->cred_guard_mutex,
|
||||
&task2->signal->cred_guard_mutex);
|
||||
err:
|
||||
put_task_struct(task1);
|
||||
put_task_struct(task2);
|
||||
|
||||
return ret;
|
||||
|
||||
err_no_task:
|
||||
rcu_read_unlock();
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
static __init int kcmp_cookies_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
get_random_bytes(cookies, sizeof(cookies));
|
||||
|
||||
for (i = 0; i < KCMP_TYPES; i++)
|
||||
cookies[i][1] |= (~(~0UL >> 1) | 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(kcmp_cookies_init);
|
||||
@@ -221,13 +221,12 @@ fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void call_usermodehelper_freeinfo(struct subprocess_info *info)
|
||||
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
|
||||
{
|
||||
if (info->cleanup)
|
||||
(*info->cleanup)(info);
|
||||
kfree(info);
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_freeinfo);
|
||||
|
||||
static void umh_complete(struct subprocess_info *sub_info)
|
||||
{
|
||||
@@ -410,7 +409,7 @@ EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
|
||||
|
||||
/**
|
||||
* __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
|
||||
* depth: New value to assign to usermodehelper_disabled.
|
||||
* @depth: New value to assign to usermodehelper_disabled.
|
||||
*
|
||||
* Change the value of usermodehelper_disabled (under umhelper_sem locked for
|
||||
* writing) and wakeup tasks waiting for it to change.
|
||||
@@ -479,6 +478,7 @@ static void helper_unlock(void)
|
||||
* structure. This should be passed to call_usermodehelper_exec to
|
||||
* exec the process and free the structure.
|
||||
*/
|
||||
static
|
||||
struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
|
||||
char **envp, gfp_t gfp_mask)
|
||||
{
|
||||
@@ -494,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
|
||||
out:
|
||||
return sub_info;
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_setup);
|
||||
|
||||
/**
|
||||
* call_usermodehelper_setfns - set a cleanup/init function
|
||||
@@ -512,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
|
||||
* Function must be runnable in either a process context or the
|
||||
* context in which call_usermodehelper_exec is called.
|
||||
*/
|
||||
static
|
||||
void call_usermodehelper_setfns(struct subprocess_info *info,
|
||||
int (*init)(struct subprocess_info *info, struct cred *new),
|
||||
void (*cleanup)(struct subprocess_info *info),
|
||||
@@ -521,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info,
|
||||
info->init = init;
|
||||
info->data = data;
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_setfns);
|
||||
|
||||
/**
|
||||
* call_usermodehelper_exec - start a usermode application
|
||||
@@ -535,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns);
|
||||
* asynchronously if wait is not set, and runs as a child of keventd.
|
||||
* (ie. it runs with full root capabilities).
|
||||
*/
|
||||
static
|
||||
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
@@ -576,7 +576,25 @@ unlock:
|
||||
helper_unlock();
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_exec);
|
||||
|
||||
int call_usermodehelper_fns(
|
||||
char *path, char **argv, char **envp, int wait,
|
||||
int (*init)(struct subprocess_info *info, struct cred *new),
|
||||
void (*cleanup)(struct subprocess_info *), void *data)
|
||||
{
|
||||
struct subprocess_info *info;
|
||||
gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
|
||||
|
||||
info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
|
||||
|
||||
if (info == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
call_usermodehelper_setfns(info, init, cleanup, data);
|
||||
|
||||
return call_usermodehelper_exec(info, wait);
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_fns);
|
||||
|
||||
static int proc_cap_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
|
||||
@@ -149,7 +149,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
|
||||
{
|
||||
int nr;
|
||||
int rc;
|
||||
struct task_struct *task;
|
||||
struct task_struct *task, *me = current;
|
||||
|
||||
/* Ignore SIGCHLD causing any terminated children to autoreap */
|
||||
spin_lock_irq(&me->sighand->siglock);
|
||||
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
|
||||
spin_unlock_irq(&me->sighand->siglock);
|
||||
|
||||
/*
|
||||
* The last thread in the cgroup-init thread group is terminating.
|
||||
@@ -191,6 +196,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
static int pid_ns_ctl_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
@@ -218,8 +224,8 @@ static struct ctl_table pid_ns_ctl_table[] = {
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
|
||||
#endif /* CONFIG_CHECKPOINT_RESTORE */
|
||||
|
||||
int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
|
||||
{
|
||||
@@ -253,7 +259,10 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
|
||||
static __init int pid_namespaces_init(void)
|
||||
{
|
||||
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
|
||||
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
register_sysctl_paths(kern_path, pid_ns_ctl_table);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -515,8 +515,8 @@ out:
|
||||
* @root: root resource descriptor
|
||||
* @new: resource descriptor desired by caller
|
||||
* @size: requested resource region size
|
||||
* @min: minimum size to allocate
|
||||
* @max: maximum size to allocate
|
||||
* @min: minimum boundary to allocate
|
||||
* @max: maximum boundary to allocate
|
||||
* @align: alignment requested, in bytes
|
||||
* @alignf: alignment function, optional, called if not NULL
|
||||
* @alignf_data: arbitrary data to pass to the @alignf function
|
||||
|
||||
@@ -1656,19 +1656,18 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
|
||||
info.si_signo = sig;
|
||||
info.si_errno = 0;
|
||||
/*
|
||||
* we are under tasklist_lock here so our parent is tied to
|
||||
* us and cannot exit and release its namespace.
|
||||
* We are under tasklist_lock here so our parent is tied to
|
||||
* us and cannot change.
|
||||
*
|
||||
* the only it can is to switch its nsproxy with sys_unshare,
|
||||
* bu uncharing pid namespaces is not allowed, so we'll always
|
||||
* see relevant namespace
|
||||
* task_active_pid_ns will always return the same pid namespace
|
||||
* until a task passes through release_task.
|
||||
*
|
||||
* write_lock() currently calls preempt_disable() which is the
|
||||
* same as rcu_read_lock(), but according to Oleg, this is not
|
||||
* correct to rely on this
|
||||
*/
|
||||
rcu_read_lock();
|
||||
info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
|
||||
info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent));
|
||||
info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns),
|
||||
task_uid(tsk));
|
||||
rcu_read_unlock();
|
||||
|
||||
213
kernel/sys.c
213
kernel/sys.c
@@ -36,6 +36,8 @@
|
||||
#include <linux/personality.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/version.h>
|
||||
@@ -1378,8 +1380,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
|
||||
memcpy(u->nodename, tmp, len);
|
||||
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
|
||||
errno = 0;
|
||||
uts_proc_notify(UTS_PROC_HOSTNAME);
|
||||
}
|
||||
uts_proc_notify(UTS_PROC_HOSTNAME);
|
||||
up_write(&uts_sem);
|
||||
return errno;
|
||||
}
|
||||
@@ -1429,8 +1431,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
|
||||
memcpy(u->domainname, tmp, len);
|
||||
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
|
||||
errno = 0;
|
||||
uts_proc_notify(UTS_PROC_DOMAINNAME);
|
||||
}
|
||||
uts_proc_notify(UTS_PROC_DOMAINNAME);
|
||||
up_write(&uts_sem);
|
||||
return errno;
|
||||
}
|
||||
@@ -1784,77 +1786,102 @@ SYSCALL_DEFINE1(umask, int, mask)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
static bool vma_flags_mismatch(struct vm_area_struct *vma,
|
||||
unsigned long required,
|
||||
unsigned long banned)
|
||||
{
|
||||
return (vma->vm_flags & required) != required ||
|
||||
(vma->vm_flags & banned);
|
||||
}
|
||||
|
||||
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
|
||||
{
|
||||
struct file *exe_file;
|
||||
struct dentry *dentry;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
|
||||
* remain. So perform a quick test first.
|
||||
*/
|
||||
if (mm->num_exe_file_vmas)
|
||||
return -EBUSY;
|
||||
|
||||
exe_file = fget(fd);
|
||||
if (!exe_file)
|
||||
return -EBADF;
|
||||
|
||||
dentry = exe_file->f_path.dentry;
|
||||
|
||||
/*
|
||||
* Because the original mm->exe_file points to executable file, make
|
||||
* sure that this one is executable as well, to avoid breaking an
|
||||
* overall picture.
|
||||
*/
|
||||
err = -EACCES;
|
||||
if (!S_ISREG(dentry->d_inode->i_mode) ||
|
||||
exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC)
|
||||
goto exit;
|
||||
|
||||
err = inode_permission(dentry->d_inode, MAY_EXEC);
|
||||
if (err)
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* The symlink can be changed only once, just to disallow arbitrary
|
||||
* transitions malicious software might bring in. This means one
|
||||
* could make a snapshot over all processes running and monitor
|
||||
* /proc/pid/exe changes to notice unusual activity if needed.
|
||||
*/
|
||||
down_write(&mm->mmap_sem);
|
||||
if (likely(!mm->exe_file))
|
||||
set_mm_exe_file(mm, exe_file);
|
||||
else
|
||||
err = -EBUSY;
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
exit:
|
||||
fput(exe_file);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int prctl_set_mm(int opt, unsigned long addr,
|
||||
unsigned long arg4, unsigned long arg5)
|
||||
{
|
||||
unsigned long rlim = rlimit(RLIMIT_DATA);
|
||||
unsigned long vm_req_flags;
|
||||
unsigned long vm_bad_flags;
|
||||
struct vm_area_struct *vma;
|
||||
int error = 0;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
int error;
|
||||
|
||||
if (arg4 | arg5)
|
||||
if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_RESOURCE))
|
||||
return -EPERM;
|
||||
|
||||
if (opt == PR_SET_MM_EXE_FILE)
|
||||
return prctl_set_mm_exe_file(mm, (unsigned int)addr);
|
||||
|
||||
if (addr >= TASK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
error = -EINVAL;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
vma = find_vma(mm, addr);
|
||||
|
||||
if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
|
||||
/* It must be existing VMA */
|
||||
if (!vma || vma->vm_start > addr)
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = -EINVAL;
|
||||
switch (opt) {
|
||||
case PR_SET_MM_START_CODE:
|
||||
mm->start_code = addr;
|
||||
break;
|
||||
case PR_SET_MM_END_CODE:
|
||||
vm_req_flags = VM_READ | VM_EXEC;
|
||||
vm_bad_flags = VM_WRITE | VM_MAYSHARE;
|
||||
|
||||
if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
|
||||
(vma->vm_flags & vm_bad_flags))
|
||||
goto out;
|
||||
|
||||
if (opt == PR_SET_MM_START_CODE)
|
||||
mm->start_code = addr;
|
||||
else
|
||||
mm->end_code = addr;
|
||||
mm->end_code = addr;
|
||||
break;
|
||||
|
||||
case PR_SET_MM_START_DATA:
|
||||
case PR_SET_MM_END_DATA:
|
||||
vm_req_flags = VM_READ | VM_WRITE;
|
||||
vm_bad_flags = VM_EXEC | VM_MAYSHARE;
|
||||
|
||||
if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
|
||||
(vma->vm_flags & vm_bad_flags))
|
||||
goto out;
|
||||
|
||||
if (opt == PR_SET_MM_START_DATA)
|
||||
mm->start_data = addr;
|
||||
else
|
||||
mm->end_data = addr;
|
||||
mm->start_data = addr;
|
||||
break;
|
||||
|
||||
case PR_SET_MM_START_STACK:
|
||||
|
||||
#ifdef CONFIG_STACK_GROWSUP
|
||||
vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
|
||||
#else
|
||||
vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
|
||||
#endif
|
||||
if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
|
||||
goto out;
|
||||
|
||||
mm->start_stack = addr;
|
||||
case PR_SET_MM_END_DATA:
|
||||
mm->end_data = addr;
|
||||
break;
|
||||
|
||||
case PR_SET_MM_START_BRK:
|
||||
@@ -1881,16 +1908,77 @@ static int prctl_set_mm(int opt, unsigned long addr,
|
||||
mm->brk = addr;
|
||||
break;
|
||||
|
||||
/*
|
||||
* If command line arguments and environment
|
||||
* are placed somewhere else on stack, we can
|
||||
* set them up here, ARG_START/END to setup
|
||||
* command line argumets and ENV_START/END
|
||||
* for environment.
|
||||
*/
|
||||
case PR_SET_MM_START_STACK:
|
||||
case PR_SET_MM_ARG_START:
|
||||
case PR_SET_MM_ARG_END:
|
||||
case PR_SET_MM_ENV_START:
|
||||
case PR_SET_MM_ENV_END:
|
||||
if (!vma) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
#ifdef CONFIG_STACK_GROWSUP
|
||||
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
|
||||
#else
|
||||
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
|
||||
#endif
|
||||
goto out;
|
||||
if (opt == PR_SET_MM_START_STACK)
|
||||
mm->start_stack = addr;
|
||||
else if (opt == PR_SET_MM_ARG_START)
|
||||
mm->arg_start = addr;
|
||||
else if (opt == PR_SET_MM_ARG_END)
|
||||
mm->arg_end = addr;
|
||||
else if (opt == PR_SET_MM_ENV_START)
|
||||
mm->env_start = addr;
|
||||
else if (opt == PR_SET_MM_ENV_END)
|
||||
mm->env_end = addr;
|
||||
break;
|
||||
|
||||
/*
|
||||
* This doesn't move auxiliary vector itself
|
||||
* since it's pinned to mm_struct, but allow
|
||||
* to fill vector with new values. It's up
|
||||
* to a caller to provide sane values here
|
||||
* otherwise user space tools which use this
|
||||
* vector might be unhappy.
|
||||
*/
|
||||
case PR_SET_MM_AUXV: {
|
||||
unsigned long user_auxv[AT_VECTOR_SIZE];
|
||||
|
||||
if (arg4 > sizeof(user_auxv))
|
||||
goto out;
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
|
||||
return -EFAULT;
|
||||
|
||||
/* Make sure the last entry is always AT_NULL */
|
||||
user_auxv[AT_VECTOR_SIZE - 2] = 0;
|
||||
user_auxv[AT_VECTOR_SIZE - 1] = 0;
|
||||
|
||||
BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
|
||||
|
||||
task_lock(current);
|
||||
memcpy(mm->saved_auxv, user_auxv, arg4);
|
||||
task_unlock(current);
|
||||
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
|
||||
out:
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
return error;
|
||||
}
|
||||
#else /* CONFIG_CHECKPOINT_RESTORE */
|
||||
@@ -2114,7 +2202,6 @@ int orderly_poweroff(bool force)
|
||||
NULL
|
||||
};
|
||||
int ret = -ENOMEM;
|
||||
struct subprocess_info *info;
|
||||
|
||||
if (argv == NULL) {
|
||||
printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
|
||||
@@ -2122,18 +2209,16 @@ int orderly_poweroff(bool force)
|
||||
goto out;
|
||||
}
|
||||
|
||||
info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
|
||||
if (info == NULL) {
|
||||
ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT,
|
||||
NULL, argv_cleanup, NULL);
|
||||
out:
|
||||
if (likely(!ret))
|
||||
return 0;
|
||||
|
||||
if (ret == -ENOMEM)
|
||||
argv_free(argv);
|
||||
goto out;
|
||||
}
|
||||
|
||||
call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
|
||||
|
||||
ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
|
||||
|
||||
out:
|
||||
if (ret && force) {
|
||||
if (force) {
|
||||
printk(KERN_WARNING "Failed to start orderly shutdown: "
|
||||
"forcing the issue\n");
|
||||
|
||||
|
||||
@@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark);
|
||||
cond_syscall(sys_name_to_handle_at);
|
||||
cond_syscall(sys_open_by_handle_at);
|
||||
cond_syscall(compat_sys_open_by_handle_at);
|
||||
|
||||
/* compare kernel pointers */
|
||||
cond_syscall(sys_kcmp);
|
||||
|
||||
Reference in New Issue
Block a user