diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 0b02ced1eb33..c8c05d2e112a 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -162,9 +162,121 @@ static struct file_system_type dma_buf_fs_type = { .kill_sb = kill_anon_super, }; +static struct task_dma_buf_record *find_task_dmabuf_record( + struct task_struct *task, struct dma_buf *dmabuf) +{ + struct task_dma_buf_record *rec; + + lockdep_assert_held(&task->dmabuf_info->lock); + + list_for_each_entry(rec, &task->dmabuf_info->dmabufs, node) + if (dmabuf == rec->dmabuf) + return rec; + + return NULL; +} + +static int new_task_dmabuf_record(struct task_struct *task, struct dma_buf *dmabuf) +{ + struct task_dma_buf_record *rec; + + lockdep_assert_held(&task->dmabuf_info->lock); + + rec = kmalloc(sizeof(*rec), GFP_KERNEL); + if (!rec) + return -ENOMEM; + + task->dmabuf_info->rss += dmabuf->size; + rec->dmabuf = dmabuf; + rec->refcnt = 1; + list_add(&rec->node, &task->dmabuf_info->dmabufs); + + return 0; +} + +/** + * dma_buf_account_task - Account a dmabuf to a task + * @dmabuf: [in] pointer to dma_buf + * @task: [in] pointer to task_struct + * + * When a process obtains a dmabuf file descriptor, or maps a dmabuf, this + * function attributes the provided @dmabuf to the @task. The first time @dmabuf + * is attributed to @task, the buffer's size is added to the @task's dmabuf RSS. + * + * Return: + * * 0 on success + * * A negative error code upon error + */ +int dma_buf_account_task(struct dma_buf *dmabuf, struct task_struct *task) +{ + struct task_dma_buf_record *rec; + int ret = 0; + + if (!dmabuf || !task) + return -EINVAL; + + if (!task->dmabuf_info) { + pr_err("%s dmabuf accounting record was not allocated\n", __func__); + return -ENOMEM; + } + + spin_lock(&task->dmabuf_info->lock); + rec = find_task_dmabuf_record(task, dmabuf); + if (!rec) + ret = new_task_dmabuf_record(task, dmabuf); + else + ++rec->refcnt; + spin_unlock(&task->dmabuf_info->lock); + + return ret; +} + +/** + * dma_buf_unaccount_task - Unaccount a dmabuf from a task + * @dmabuf: [in] pointer to dma_buf + * @task: [in] pointer to task_struct + * + * When a process closes a dmabuf file descriptor, or unmaps a dmabuf, this + * function removes the provided @dmabuf attribution from the @task. When all + * references to @dmabuf are removed from @task, the buffer's size is removed + * from the task's dmabuf RSS. + * + * Return: + * * 0 on success + * * A negative error code upon error + */ +void dma_buf_unaccount_task(struct dma_buf *dmabuf, struct task_struct *task) +{ + struct task_dma_buf_record *rec; + + if (!dmabuf || !task) + return; + + if (!task->dmabuf_info) { + pr_err("%s dmabuf accounting record was not allocated\n", __func__); + return; + } + + spin_lock(&task->dmabuf_info->lock); + rec = find_task_dmabuf_record(task, dmabuf); + if (!rec) { /* Failed fd_install? */ + pr_err("dmabuf not found in task list\n"); + goto err; + } + + if (--rec->refcnt == 0) { + list_del(&rec->node); + kfree(rec); + task->dmabuf_info->rss -= dmabuf->size; + } +err: + spin_unlock(&task->dmabuf_info->lock); +} + static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma) { struct dma_buf *dmabuf; + int ret; if (!is_dma_buf_file(file)) return -EINVAL; @@ -180,7 +292,15 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma) dmabuf->size >> PAGE_SHIFT) return -EINVAL; - return dmabuf->ops->mmap(dmabuf, vma); + ret = dma_buf_account_task(dmabuf, current); + if (ret) + return ret; + + ret = dmabuf->ops->mmap(dmabuf, vma); + if (ret) + dma_buf_unaccount_task(dmabuf, current); + + return ret; } static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence) @@ -557,6 +677,12 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file) spin_unlock(&dmabuf->name_lock); } +static int dma_buf_flush(struct file *file, fl_owner_t id) +{ + dma_buf_unaccount_task(file->private_data, current); + return 0; +} + static const struct file_operations dma_buf_fops = { .release = dma_buf_file_release, .mmap = dma_buf_mmap_internal, @@ -565,6 +691,7 @@ static const struct file_operations dma_buf_fops = { .unlocked_ioctl = dma_buf_ioctl, .compat_ioctl = compat_ptr_ioctl, .show_fdinfo = dma_buf_show_fdinfo, + .flush = dma_buf_flush, }; /* @@ -1555,6 +1682,8 @@ EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access_partial); int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, unsigned long pgoff) { + int ret; + if (WARN_ON(!dmabuf || !vma)) return -EINVAL; @@ -1575,7 +1704,15 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, vma_set_file(vma, dmabuf->file); vma->vm_pgoff = pgoff; - return dmabuf->ops->mmap(dmabuf, vma); + ret = dma_buf_account_task(dmabuf, current); + if (ret) + return ret; + + ret = dmabuf->ops->mmap(dmabuf, vma); + if (ret) + dma_buf_unaccount_task(dmabuf, current); + + return ret; } EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, DMA_BUF); diff --git a/fs/file.c b/fs/file.c index 1f1181b189bf..e924929ac366 100644 --- a/fs/file.c +++ b/fs/file.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" @@ -593,6 +594,9 @@ void fd_install(unsigned int fd, struct file *file) struct files_struct *files = current->files; struct fdtable *fdt; + if (is_dma_buf_file(file) && dma_buf_account_task(file->private_data, current)) + pr_err("FD dmabuf accounting failed\n"); + rcu_read_lock_sched(); if (unlikely(files->resize_in_progress)) { diff --git a/fs/proc/base.c b/fs/proc/base.c index 7cff02bc816e..f7d8188b0ccf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -3304,6 +3305,24 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, } #endif /* CONFIG_STACKLEAK_METRICS */ +#ifdef CONFIG_DMA_SHARED_BUFFER +static int proc_dmabuf_rss_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + if (!task->dmabuf_info) { + pr_err("%s dmabuf accounting record was not allocated\n", __func__); + return -ENOMEM; + } + + if (!(task->flags & PF_KTHREAD)) + seq_printf(m, "%lld\n", READ_ONCE(task->dmabuf_info->rss)); + else + seq_puts(m, "0\n"); + + return 0; +} +#endif + /* * Thread groups */ @@ -3427,6 +3446,9 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif +#ifdef CONFIG_DMA_SHARED_BUFFER + ONE("dmabuf_rss", S_IRUGO, proc_dmabuf_rss_show), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 64d67293d76b..1647fb38fe80 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -24,6 +24,9 @@ #include #include #include +#ifndef __GENKSYMS__ +#include +#endif struct device; struct dma_buf; @@ -639,6 +642,43 @@ struct dma_buf_export_info { ANDROID_KABI_RESERVE(2); }; +/** + * struct task_dma_buf_record - Holds the number of (VMA and FD) references to a + * dmabuf by a collection of tasks that share both mm_struct and files_struct. + * This is the list entry type for @task_dma_buf_info dmabufs list. + * + * @node: Stores the list this record is on. + * @dmabuf: The dmabuf this record is for. + * @refcnt: The number of VMAs and FDs that reference @dmabuf by the tasks that + * share this record. + */ +struct task_dma_buf_record { + struct list_head node; + struct dma_buf *dmabuf; + unsigned long refcnt; +}; + +/** + * struct task_dma_buf_info - Holds a RSS counter, and a list of dmabufs for all + * tasks that share both mm_struct and files_struct. + * + * @rss: The sum of all dmabuf memory referenced by the tasks via memory + * mappings or file descriptors in bytes. Buffers referenced more than + * once by the process (multiple mmaps, multiple FDs, or any combination + * of both mmaps and FDs) only cause the buffer to be accounted to the + * process once. Partial mappings cause the full size of the buffer to be + * accounted, regardless of the size of the mapping. + * @refcnt: The number of tasks sharing this struct. + * @lock: Lock protecting writes for @rss, and reads/writes for @dmabufs. + * @dmabufs: List of all dmabufs referenced by the tasks. + */ +struct task_dma_buf_info { + s64 rss; + refcount_t refcnt; + spinlock_t lock; + struct list_head dmabufs; +}; + /** * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters * @name: export-info name @@ -741,4 +781,7 @@ int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); long dma_buf_set_name(struct dma_buf *dmabuf, const char *name); int dma_buf_get_flags(struct dma_buf *dmabuf, unsigned long *flags); + +int dma_buf_account_task(struct dma_buf *dmabuf, struct task_struct *task); +void dma_buf_unaccount_task(struct dma_buf *dmabuf, struct task_struct *task); #endif /* __DMA_BUF_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 1299b4497d87..68ba96bde447 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -70,6 +70,7 @@ struct seq_file; struct sighand_struct; struct signal_struct; struct task_delay_info; +struct task_dma_buf_info; struct task_group; struct user_event_mm; @@ -1516,6 +1517,9 @@ struct task_struct { */ struct callback_head l1d_flush_kill; #endif + + struct task_dma_buf_info *dmabuf_info; + ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); diff --git a/init/init_task.c b/init/init_task.c index 31ceb0e469f7..d80c007ab59b 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -214,6 +214,7 @@ struct task_struct init_task .android_vendor_data1 = {0, }, .android_oem_data1 = {0, }, #endif + .dmabuf_info = NULL, }; EXPORT_SYMBOL(init_task); diff --git a/kernel/fork.c b/kernel/fork.c index 75b1a4458a7e..66636a979911 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -101,6 +101,7 @@ #include #include #include +#include #include #include @@ -994,12 +995,32 @@ static inline void put_signal_struct(struct signal_struct *sig) free_signal_struct(sig); } +static void put_dmabuf_info(struct task_struct *tsk) +{ + if (!tsk->dmabuf_info) { + pr_err("%s dmabuf accounting record was not allocated\n", __func__); + return; + } + + if (!refcount_dec_and_test(&tsk->dmabuf_info->refcnt)) + return; + + if (READ_ONCE(tsk->dmabuf_info->rss)) + pr_err("%s destroying task with non-zero dmabuf rss\n", __func__); + + if (!list_empty(&tsk->dmabuf_info->dmabufs)) + pr_err("%s destroying task with non-empty dmabuf list\n", __func__); + + kfree(tsk->dmabuf_info); +} + void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); + put_dmabuf_info(tsk); io_uring_free(tsk); cgroup_free(tsk); task_numa_free(tsk, true); @@ -2268,6 +2289,58 @@ static void rv_task_fork(struct task_struct *p) #define rv_task_fork(p) do {} while (0) #endif +static int copy_dmabuf_info(u64 clone_flags, struct task_struct *p) +{ + struct task_dma_buf_record *rec, *copy; + + if (current->dmabuf_info && (clone_flags & (CLONE_VM | CLONE_FILES)) + == (CLONE_VM | CLONE_FILES)) { + /* + * Both MM and FD references to dmabufs are shared with the parent, so + * we can share a RSS counter with the parent. + */ + refcount_inc(¤t->dmabuf_info->refcnt); + p->dmabuf_info = current->dmabuf_info; + return 0; + } + + p->dmabuf_info = kmalloc(sizeof(*p->dmabuf_info), GFP_KERNEL); + if (!p->dmabuf_info) + return -ENOMEM; + + refcount_set(&p->dmabuf_info->refcnt, 1); + spin_lock_init(&p->dmabuf_info->lock); + INIT_LIST_HEAD(&p->dmabuf_info->dmabufs); + if (current->dmabuf_info) { + spin_lock(¤t->dmabuf_info->lock); + p->dmabuf_info->rss = current->dmabuf_info->rss; + list_for_each_entry(rec, ¤t->dmabuf_info->dmabufs, node) { + copy = kmalloc(sizeof(*copy), GFP_KERNEL); + if (!copy) { + spin_unlock(¤t->dmabuf_info->lock); + goto err_list_copy; + } + + copy->dmabuf = rec->dmabuf; + copy->refcnt = rec->refcnt; + list_add(©->node, &p->dmabuf_info->dmabufs); + } + spin_unlock(¤t->dmabuf_info->lock); + } else { + p->dmabuf_info->rss = 0; + } + + return 0; + +err_list_copy: + list_for_each_entry_safe(rec, copy, &p->dmabuf_info->dmabufs, node) { + list_del(&rec->node); + kfree(rec); + } + kfree(p->dmabuf_info); + return -ENOMEM; +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2509,14 +2582,18 @@ __latent_entropy struct task_struct *copy_process( p->bpf_ctx = NULL; #endif - /* Perform scheduler related setup. Assign this task to a CPU. */ - retval = sched_fork(clone_flags, p); + retval = copy_dmabuf_info(clone_flags, p); if (retval) goto bad_fork_cleanup_policy; + /* Perform scheduler related setup. Assign this task to a CPU. */ + retval = sched_fork(clone_flags, p); + if (retval) + goto bad_fork_cleanup_dmabuf; + retval = perf_event_init_task(p, clone_flags); if (retval) - goto bad_fork_cleanup_policy; + goto bad_fork_cleanup_dmabuf; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_perf; @@ -2819,6 +2896,8 @@ bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_perf: perf_event_free_task(p); +bad_fork_cleanup_dmabuf: + put_dmabuf_info(p); bad_fork_cleanup_policy: lockdep_free_task(p); #ifdef CONFIG_NUMA diff --git a/mm/mmap.c b/mm/mmap.c index 4c74fb3d7a94..6da684ab9f98 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -144,8 +145,11 @@ static void remove_vma(struct vm_area_struct *vma, bool unreachable) { might_sleep(); vma_close(vma); - if (vma->vm_file) + if (vma->vm_file) { + if (is_dma_buf_file(vma->vm_file)) + dma_buf_unaccount_task(vma->vm_file->private_data, current); fput(vma->vm_file); + } mpol_put(vma_policy(vma)); if (unreachable) __vm_area_free(vma); @@ -2417,8 +2421,14 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, if (err) goto out_free_mpol; - if (new->vm_file) + if (new->vm_file) { get_file(new->vm_file); + if (is_dma_buf_file(new->vm_file)) { + /* Should never fail since this task already references the buffer */ + if (dma_buf_account_task(new->vm_file->private_data, current)) + pr_err("%s failed to account dmabuf\n", __func__); + } + } if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new);