Merge branch 'akpm' (Andrew's patch-bomb)

Merge misc patches from Andrew Morton:

 - the "misc" tree - stuff from all over the map

 - checkpatch updates

 - fatfs

 - kmod changes

 - procfs

 - cpumask

 - UML

 - kexec

 - mqueue

 - rapidio

 - pidns

 - some checkpoint-restore feature work.  Reluctantly.  Most of it
   delayed a release.  I'm still rather worried that we don't have a
   clear roadmap to completion for this work.

* emailed from Andrew Morton <akpm@linux-foundation.org>: (78 patches)
  kconfig: update compression algorithm info
  c/r: prctl: add ability to set new mm_struct::exe_file
  c/r: prctl: extend PR_SET_MM to set up more mm_struct entries
  c/r: procfs: add arg_start/end, env_start/end and exit_code members to /proc/$pid/stat
  syscalls, x86: add __NR_kcmp syscall
  fs, proc: introduce /proc/<pid>/task/<tid>/children entry
  sysctl: make kernel.ns_last_pid control dependent on CHECKPOINT_RESTORE
  aio/vfs: cleanup of rw_copy_check_uvector() and compat_rw_copy_check_uvector()
  eventfd: change int to __u64 in eventfd_signal()
  fs/nls: add Apple NLS
  pidns: make killed children autoreap
  pidns: use task_active_pid_ns in do_notify_parent
  rapidio/tsi721: add DMA engine support
  rapidio: add DMA engine support for RIO data transfers
  ipc/mqueue: add rbtree node caching support
  tools/selftests: add mq_perf_tests
  ipc/mqueue: strengthen checks on mqueue creation
  ipc/mqueue: correct mq_attr_ok test
  ipc/mqueue: improve performance of send/recv
  selftests: add mq_open_tests
  ...
This commit is contained in:
Linus Torvalds
2012-05-31 18:10:18 -07:00
108 changed files with 10621 additions and 700 deletions

View File

@@ -25,6 +25,9 @@ endif
obj-y += sched/
obj-y += power/
ifeq ($(CONFIG_CHECKPOINT_RESTORE),y)
obj-$(CONFIG_X86) += kcmp.o
endif
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o

View File

@@ -10,7 +10,10 @@
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
#include <linux/bug.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
#include <linux/mutex.h>
@@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_cpu_notifier);
/**
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
* @cpu: a CPU id
*
* This function walks all processes, finds a valid mm struct for each one and
* then clears a corresponding bit in mm's cpumask. While this all sounds
* trivial, there are various non-obvious corner cases, which this function
* tries to solve in a safe manner.
*
* Also note that the function uses a somewhat relaxed locking scheme, so it may
* be called only for an already offlined CPU.
*/
void clear_tasks_mm_cpumask(int cpu)
{
struct task_struct *p;
/*
* This function is called after the cpu is taken down and marked
* offline, so its not like new tasks will ever get this cpu set in
* their mm mask. -- Peter Zijlstra
* Thus, we may use rcu_read_lock() here, instead of grabbing
* full-fledged tasklist_lock.
*/
WARN_ON(cpu_online(cpu));
rcu_read_lock();
for_each_process(p) {
struct task_struct *t;
/*
* Main thread might exit, but other threads may still have
* a valid mm. Find one.
*/
t = find_lock_task_mm(p);
if (!t)
continue;
cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
task_unlock(t);
}
rcu_read_unlock();
}
static inline void check_for_tasks(int cpu)
{
struct task_struct *p;

View File

@@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
/**
* cpm_pm_enter - CPU low power entry notifier
* cpu_pm_enter - CPU low power entry notifier
*
* Notifies listeners that a single CPU is entering a low power state that may
* cause some blocks in the same power domain as the cpu to reset.
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
* Must be called on the affected CPU with interrupts disabled. Platform is
* responsible for ensuring that cpu_pm_enter is not called twice on the same
* CPU before cpu_pm_exit is called. Notified drivers can include VFP
* co-processor, interrupt controller and it's PM extensions, local CPU
* co-processor, interrupt controller and its PM extensions, local CPU
* timers context save/restore which shouldn't be interrupted. Hence it
* must be called with interrupts disabled.
*
@@ -115,13 +115,13 @@ int cpu_pm_enter(void)
EXPORT_SYMBOL_GPL(cpu_pm_enter);
/**
* cpm_pm_exit - CPU low power exit notifier
* cpu_pm_exit - CPU low power exit notifier
*
* Notifies listeners that a single CPU is exiting a low power state that may
* have caused some blocks in the same power domain as the cpu to reset.
*
* Notified drivers can include VFP co-processor, interrupt controller
* and it's PM extensions, local CPU timers context save/restore which
* and its PM extensions, local CPU timers context save/restore which
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
*
* Return conditions are same as __raw_notifier_call_chain.
@@ -139,7 +139,7 @@ int cpu_pm_exit(void)
EXPORT_SYMBOL_GPL(cpu_pm_exit);
/**
* cpm_cluster_pm_enter - CPU cluster low power entry notifier
* cpu_cluster_pm_enter - CPU cluster low power entry notifier
*
* Notifies listeners that all cpus in a power domain are entering a low power
* state that may cause some blocks in the same power domain to reset.
@@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit);
* Must be called after cpu_pm_enter has been called on all cpus in the power
* domain, and before cpu_pm_exit has been called on any cpu in the power
* domain. Notified drivers can include VFP co-processor, interrupt controller
* and it's PM extensions, local CPU timers context save/restore which
* and its PM extensions, local CPU timers context save/restore which
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
*
* Must be called with interrupts disabled.
@@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void)
EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
/**
* cpm_cluster_pm_exit - CPU cluster low power exit notifier
* cpu_cluster_pm_exit - CPU cluster low power exit notifier
*
* Notifies listeners that all cpus in a power domain are exiting form a
* low power state that may have caused some blocks in the same power domain
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
* Must be called after cpu_pm_exit has been called on all cpus in the power
* domain, and before cpu_pm_exit has been called on any cpu in the power
* domain. Notified drivers can include VFP co-processor, interrupt controller
* and it's PM extensions, local CPU timers context save/restore which
* and its PM extensions, local CPU timers context save/restore which
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
*
* Return conditions are same as __raw_notifier_call_chain.

View File

@@ -884,9 +884,9 @@ static void check_stack_usage(void)
spin_lock(&low_water_lock);
if (free < lowest_to_date) {
printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
"left\n",
current->comm, free);
printk(KERN_WARNING "%s (%d) used greatest stack depth: "
"%lu bytes left\n",
current->comm, task_pid_nr(current), free);
lowest_to_date = free;
}
spin_unlock(&low_water_lock);
@@ -1214,7 +1214,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
unsigned long state;
int retval, status, traced;
pid_t pid = task_pid_vnr(p);
uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid);
uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
struct siginfo __user *infop;
if (!likely(wo->wo_flags & WEXITED))

View File

@@ -787,9 +787,6 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
/* Get rid of any cached register state */
deactivate_mm(tsk, mm);
if (tsk->vfork_done)
complete_vfork_done(tsk);
/*
* If we're exiting normally, clear a user-space tid field if
* requested. We leave this alone when dying by signal, to leave
@@ -810,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
}
tsk->clear_child_tid = NULL;
}
/*
* All done, finally we can wake up parent and return this mm to him.
* Also kthread_stop() uses this completion for synchronization.
*/
if (tsk->vfork_done)
complete_vfork_done(tsk);
}
/*

View File

@@ -7,6 +7,8 @@
* This file contains driver APIs to the irq subsystem.
*/
#define pr_fmt(fmt) "genirq: " fmt
#include <linux/irq.h>
#include <linux/kthread.h>
#include <linux/module.h>
@@ -565,7 +567,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
* IRQF_TRIGGER_* but the PIC does not support multiple
* flow-types?
*/
pr_debug("genirq: No set_type function for IRQ %d (%s)\n", irq,
pr_debug("No set_type function for IRQ %d (%s)\n", irq,
chip ? (chip->name ? : "unknown") : "unknown");
return 0;
}
@@ -600,7 +602,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
ret = 0;
break;
default:
pr_err("genirq: Setting trigger mode %lu for irq %u failed (%pF)\n",
pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
flags, irq, chip->irq_set_type);
}
if (unmask)
@@ -837,7 +839,7 @@ void exit_irq_thread(void)
action = kthread_data(tsk);
pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
desc = irq_to_desc(action->irq);
@@ -1044,7 +1046,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* has. The type flags are unreliable as the
* underlying chip implementation can override them.
*/
pr_err("genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
irq);
ret = -EINVAL;
goto out_mask;
@@ -1095,7 +1097,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (nmsk != omsk)
/* hope the handler works with current trigger mode */
pr_warning("genirq: irq %d uses trigger mode %u; requested %u\n",
pr_warning("irq %d uses trigger mode %u; requested %u\n",
irq, nmsk, omsk);
}
@@ -1133,7 +1135,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
mismatch:
if (!(new->flags & IRQF_PROBE_SHARED)) {
pr_err("genirq: Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
irq, new->flags, new->name, old->flags, old->name);
#ifdef CONFIG_DEBUG_SHIRQ
dump_stack();

196
kernel/kcmp.c Normal file
View File

@@ -0,0 +1,196 @@
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/fdtable.h>
#include <linux/string.h>
#include <linux/random.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/cache.h>
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/kcmp.h>
#include <asm/unistd.h>
/*
* We don't expose the real in-memory order of objects for security reasons.
* But still the comparison results should be suitable for sorting. So we
* obfuscate kernel pointers values and compare the production instead.
*
* The obfuscation is done in two steps. First we xor the kernel pointer with
* a random value, which puts pointer into a new position in a reordered space.
* Secondly we multiply the xor production with a large odd random number to
* permute its bits even more (the odd multiplier guarantees that the product
* is unique ever after the high bits are truncated, since any odd number is
* relative prime to 2^n).
*
* Note also that the obfuscation itself is invisible to userspace and if needed
* it can be changed to an alternate scheme.
*/
static unsigned long cookies[KCMP_TYPES][2] __read_mostly;
static long kptr_obfuscate(long v, int type)
{
return (v ^ cookies[type][0]) * cookies[type][1];
}
/*
* 0 - equal, i.e. v1 = v2
* 1 - less than, i.e. v1 < v2
* 2 - greater than, i.e. v1 > v2
* 3 - not equal but ordering unavailable (reserved for future)
*/
static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
{
long ret;
ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type);
return (ret < 0) | ((ret > 0) << 1);
}
/* The caller must have pinned the task */
static struct file *
get_file_raw_ptr(struct task_struct *task, unsigned int idx)
{
struct file *file = NULL;
task_lock(task);
rcu_read_lock();
if (task->files)
file = fcheck_files(task->files, idx);
rcu_read_unlock();
task_unlock(task);
return file;
}
static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
{
if (likely(m2 != m1))
mutex_unlock(m2);
mutex_unlock(m1);
}
static int kcmp_lock(struct mutex *m1, struct mutex *m2)
{
int err;
if (m2 > m1)
swap(m1, m2);
err = mutex_lock_killable(m1);
if (!err && likely(m1 != m2)) {
err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
if (err)
mutex_unlock(m1);
}
return err;
}
SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
unsigned long, idx1, unsigned long, idx2)
{
struct task_struct *task1, *task2;
int ret;
rcu_read_lock();
/*
* Tasks are looked up in caller's PID namespace only.
*/
task1 = find_task_by_vpid(pid1);
task2 = find_task_by_vpid(pid2);
if (!task1 || !task2)
goto err_no_task;
get_task_struct(task1);
get_task_struct(task2);
rcu_read_unlock();
/*
* One should have enough rights to inspect task details.
*/
ret = kcmp_lock(&task1->signal->cred_guard_mutex,
&task2->signal->cred_guard_mutex);
if (ret)
goto err;
if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
!ptrace_may_access(task2, PTRACE_MODE_READ)) {
ret = -EPERM;
goto err_unlock;
}
switch (type) {
case KCMP_FILE: {
struct file *filp1, *filp2;
filp1 = get_file_raw_ptr(task1, idx1);
filp2 = get_file_raw_ptr(task2, idx2);
if (filp1 && filp2)
ret = kcmp_ptr(filp1, filp2, KCMP_FILE);
else
ret = -EBADF;
break;
}
case KCMP_VM:
ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
break;
case KCMP_FILES:
ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
break;
case KCMP_FS:
ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
break;
case KCMP_SIGHAND:
ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
break;
case KCMP_IO:
ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
break;
case KCMP_SYSVSEM:
#ifdef CONFIG_SYSVIPC
ret = kcmp_ptr(task1->sysvsem.undo_list,
task2->sysvsem.undo_list,
KCMP_SYSVSEM);
#else
ret = -EOPNOTSUPP;
#endif
break;
default:
ret = -EINVAL;
break;
}
err_unlock:
kcmp_unlock(&task1->signal->cred_guard_mutex,
&task2->signal->cred_guard_mutex);
err:
put_task_struct(task1);
put_task_struct(task2);
return ret;
err_no_task:
rcu_read_unlock();
return -ESRCH;
}
static __init int kcmp_cookies_init(void)
{
int i;
get_random_bytes(cookies, sizeof(cookies));
for (i = 0; i < KCMP_TYPES; i++)
cookies[i][1] |= (~(~0UL >> 1) | 1);
return 0;
}
arch_initcall(kcmp_cookies_init);

View File

@@ -221,13 +221,12 @@ fail:
return 0;
}
void call_usermodehelper_freeinfo(struct subprocess_info *info)
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
if (info->cleanup)
(*info->cleanup)(info);
kfree(info);
}
EXPORT_SYMBOL(call_usermodehelper_freeinfo);
static void umh_complete(struct subprocess_info *sub_info)
{
@@ -410,7 +409,7 @@ EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
/**
* __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
* depth: New value to assign to usermodehelper_disabled.
* @depth: New value to assign to usermodehelper_disabled.
*
* Change the value of usermodehelper_disabled (under umhelper_sem locked for
* writing) and wakeup tasks waiting for it to change.
@@ -479,6 +478,7 @@ static void helper_unlock(void)
* structure. This should be passed to call_usermodehelper_exec to
* exec the process and free the structure.
*/
static
struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
char **envp, gfp_t gfp_mask)
{
@@ -494,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
out:
return sub_info;
}
EXPORT_SYMBOL(call_usermodehelper_setup);
/**
* call_usermodehelper_setfns - set a cleanup/init function
@@ -512,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
* Function must be runnable in either a process context or the
* context in which call_usermodehelper_exec is called.
*/
static
void call_usermodehelper_setfns(struct subprocess_info *info,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *info),
@@ -521,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info,
info->init = init;
info->data = data;
}
EXPORT_SYMBOL(call_usermodehelper_setfns);
/**
* call_usermodehelper_exec - start a usermode application
@@ -535,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns);
* asynchronously if wait is not set, and runs as a child of keventd.
* (ie. it runs with full root capabilities).
*/
static
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{
DECLARE_COMPLETION_ONSTACK(done);
@@ -576,7 +576,25 @@ unlock:
helper_unlock();
return retval;
}
EXPORT_SYMBOL(call_usermodehelper_exec);
int call_usermodehelper_fns(
char *path, char **argv, char **envp, int wait,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data)
{
struct subprocess_info *info;
gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
if (info == NULL)
return -ENOMEM;
call_usermodehelper_setfns(info, init, cleanup, data);
return call_usermodehelper_exec(info, wait);
}
EXPORT_SYMBOL(call_usermodehelper_fns);
static int proc_cap_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)

View File

@@ -149,7 +149,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
{
int nr;
int rc;
struct task_struct *task;
struct task_struct *task, *me = current;
/* Ignore SIGCHLD causing any terminated children to autoreap */
spin_lock_irq(&me->sighand->siglock);
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
spin_unlock_irq(&me->sighand->siglock);
/*
* The last thread in the cgroup-init thread group is terminating.
@@ -191,6 +196,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
return;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
static int pid_ns_ctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -218,8 +224,8 @@ static struct ctl_table pid_ns_ctl_table[] = {
},
{ }
};
static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
#endif /* CONFIG_CHECKPOINT_RESTORE */
int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
{
@@ -253,7 +259,10 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
static __init int pid_namespaces_init(void)
{
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
#ifdef CONFIG_CHECKPOINT_RESTORE
register_sysctl_paths(kern_path, pid_ns_ctl_table);
#endif
return 0;
}

View File

@@ -515,8 +515,8 @@ out:
* @root: root resource descriptor
* @new: resource descriptor desired by caller
* @size: requested resource region size
* @min: minimum size to allocate
* @max: maximum size to allocate
* @min: minimum boundary to allocate
* @max: maximum boundary to allocate
* @align: alignment requested, in bytes
* @alignf: alignment function, optional, called if not NULL
* @alignf_data: arbitrary data to pass to the @alignf function

View File

@@ -1656,19 +1656,18 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
info.si_signo = sig;
info.si_errno = 0;
/*
* we are under tasklist_lock here so our parent is tied to
* us and cannot exit and release its namespace.
* We are under tasklist_lock here so our parent is tied to
* us and cannot change.
*
* the only it can is to switch its nsproxy with sys_unshare,
* bu uncharing pid namespaces is not allowed, so we'll always
* see relevant namespace
* task_active_pid_ns will always return the same pid namespace
* until a task passes through release_task.
*
* write_lock() currently calls preempt_disable() which is the
* same as rcu_read_lock(), but according to Oleg, this is not
* correct to rely on this
*/
rcu_read_lock();
info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent));
info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns),
task_uid(tsk));
rcu_read_unlock();

View File

@@ -36,6 +36,8 @@
#include <linux/personality.h>
#include <linux/ptrace.h>
#include <linux/fs_struct.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/gfp.h>
#include <linux/syscore_ops.h>
#include <linux/version.h>
@@ -1378,8 +1380,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
memcpy(u->nodename, tmp, len);
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
errno = 0;
uts_proc_notify(UTS_PROC_HOSTNAME);
}
uts_proc_notify(UTS_PROC_HOSTNAME);
up_write(&uts_sem);
return errno;
}
@@ -1429,8 +1431,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
memcpy(u->domainname, tmp, len);
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
errno = 0;
uts_proc_notify(UTS_PROC_DOMAINNAME);
}
uts_proc_notify(UTS_PROC_DOMAINNAME);
up_write(&uts_sem);
return errno;
}
@@ -1784,77 +1786,102 @@ SYSCALL_DEFINE1(umask, int, mask)
}
#ifdef CONFIG_CHECKPOINT_RESTORE
static bool vma_flags_mismatch(struct vm_area_struct *vma,
unsigned long required,
unsigned long banned)
{
return (vma->vm_flags & required) != required ||
(vma->vm_flags & banned);
}
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{
struct file *exe_file;
struct dentry *dentry;
int err;
/*
* Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
* remain. So perform a quick test first.
*/
if (mm->num_exe_file_vmas)
return -EBUSY;
exe_file = fget(fd);
if (!exe_file)
return -EBADF;
dentry = exe_file->f_path.dentry;
/*
* Because the original mm->exe_file points to executable file, make
* sure that this one is executable as well, to avoid breaking an
* overall picture.
*/
err = -EACCES;
if (!S_ISREG(dentry->d_inode->i_mode) ||
exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
err = inode_permission(dentry->d_inode, MAY_EXEC);
if (err)
goto exit;
/*
* The symlink can be changed only once, just to disallow arbitrary
* transitions malicious software might bring in. This means one
* could make a snapshot over all processes running and monitor
* /proc/pid/exe changes to notice unusual activity if needed.
*/
down_write(&mm->mmap_sem);
if (likely(!mm->exe_file))
set_mm_exe_file(mm, exe_file);
else
err = -EBUSY;
up_write(&mm->mmap_sem);
exit:
fput(exe_file);
return err;
}
static int prctl_set_mm(int opt, unsigned long addr,
unsigned long arg4, unsigned long arg5)
{
unsigned long rlim = rlimit(RLIMIT_DATA);
unsigned long vm_req_flags;
unsigned long vm_bad_flags;
struct vm_area_struct *vma;
int error = 0;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int error;
if (arg4 | arg5)
if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
return -EINVAL;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (opt == PR_SET_MM_EXE_FILE)
return prctl_set_mm_exe_file(mm, (unsigned int)addr);
if (addr >= TASK_SIZE)
return -EINVAL;
error = -EINVAL;
down_read(&mm->mmap_sem);
vma = find_vma(mm, addr);
if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
/* It must be existing VMA */
if (!vma || vma->vm_start > addr)
goto out;
}
error = -EINVAL;
switch (opt) {
case PR_SET_MM_START_CODE:
mm->start_code = addr;
break;
case PR_SET_MM_END_CODE:
vm_req_flags = VM_READ | VM_EXEC;
vm_bad_flags = VM_WRITE | VM_MAYSHARE;
if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
(vma->vm_flags & vm_bad_flags))
goto out;
if (opt == PR_SET_MM_START_CODE)
mm->start_code = addr;
else
mm->end_code = addr;
mm->end_code = addr;
break;
case PR_SET_MM_START_DATA:
case PR_SET_MM_END_DATA:
vm_req_flags = VM_READ | VM_WRITE;
vm_bad_flags = VM_EXEC | VM_MAYSHARE;
if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
(vma->vm_flags & vm_bad_flags))
goto out;
if (opt == PR_SET_MM_START_DATA)
mm->start_data = addr;
else
mm->end_data = addr;
mm->start_data = addr;
break;
case PR_SET_MM_START_STACK:
#ifdef CONFIG_STACK_GROWSUP
vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
#else
vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
#endif
if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
goto out;
mm->start_stack = addr;
case PR_SET_MM_END_DATA:
mm->end_data = addr;
break;
case PR_SET_MM_START_BRK:
@@ -1881,16 +1908,77 @@ static int prctl_set_mm(int opt, unsigned long addr,
mm->brk = addr;
break;
/*
* If command line arguments and environment
* are placed somewhere else on stack, we can
* set them up here, ARG_START/END to setup
* command line argumets and ENV_START/END
* for environment.
*/
case PR_SET_MM_START_STACK:
case PR_SET_MM_ARG_START:
case PR_SET_MM_ARG_END:
case PR_SET_MM_ENV_START:
case PR_SET_MM_ENV_END:
if (!vma) {
error = -EFAULT;
goto out;
}
#ifdef CONFIG_STACK_GROWSUP
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
#else
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
#endif
goto out;
if (opt == PR_SET_MM_START_STACK)
mm->start_stack = addr;
else if (opt == PR_SET_MM_ARG_START)
mm->arg_start = addr;
else if (opt == PR_SET_MM_ARG_END)
mm->arg_end = addr;
else if (opt == PR_SET_MM_ENV_START)
mm->env_start = addr;
else if (opt == PR_SET_MM_ENV_END)
mm->env_end = addr;
break;
/*
* This doesn't move auxiliary vector itself
* since it's pinned to mm_struct, but allow
* to fill vector with new values. It's up
* to a caller to provide sane values here
* otherwise user space tools which use this
* vector might be unhappy.
*/
case PR_SET_MM_AUXV: {
unsigned long user_auxv[AT_VECTOR_SIZE];
if (arg4 > sizeof(user_auxv))
goto out;
up_read(&mm->mmap_sem);
if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
return -EFAULT;
/* Make sure the last entry is always AT_NULL */
user_auxv[AT_VECTOR_SIZE - 2] = 0;
user_auxv[AT_VECTOR_SIZE - 1] = 0;
BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
task_lock(current);
memcpy(mm->saved_auxv, user_auxv, arg4);
task_unlock(current);
return 0;
}
default:
error = -EINVAL;
goto out;
}
error = 0;
out:
up_read(&mm->mmap_sem);
return error;
}
#else /* CONFIG_CHECKPOINT_RESTORE */
@@ -2114,7 +2202,6 @@ int orderly_poweroff(bool force)
NULL
};
int ret = -ENOMEM;
struct subprocess_info *info;
if (argv == NULL) {
printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
@@ -2122,18 +2209,16 @@ int orderly_poweroff(bool force)
goto out;
}
info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
if (info == NULL) {
ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT,
NULL, argv_cleanup, NULL);
out:
if (likely(!ret))
return 0;
if (ret == -ENOMEM)
argv_free(argv);
goto out;
}
call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
out:
if (ret && force) {
if (force) {
printk(KERN_WARNING "Failed to start orderly shutdown: "
"forcing the issue\n");

View File

@@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark);
cond_syscall(sys_name_to_handle_at);
cond_syscall(sys_open_by_handle_at);
cond_syscall(compat_sys_open_by_handle_at);
/* compare kernel pointers */
cond_syscall(sys_kcmp);