From bddab7cf5de4a43346bc8e6803b20738b6d9e1cb Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 25 Jun 2025 21:15:34 +0000 Subject: [PATCH] ANDROID: Track per-process dmabuf RSS HWM A per-process high watermark counter for dmabuf memory is useful for detecting bursty / transient allocations causing memory pressure spikes that don't appear in the dmabuf RSS counter when userspace reacts to memory pressure and reads RSS after buffers have already been freed. The /proc//dmabuf_rss_hwm file in procfs now reports the maximum value of /proc//dmabuf_rss during the lifetime of the process. The value of /proc//dmabuf_rss_hwm can be reset to the current value of /proc//dmabuf_rss by writing "0" to the file. Bug: 424648392 Change-Id: I184d83d48ec63b805b712f19e121199a63095965 Signed-off-by: T.J. Mercier --- drivers/dma-buf/dma-buf.c | 8 ++++ fs/proc/base.c | 77 +++++++++++++++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++- kernel/fork.c | 2 + 4 files changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index c8c05d2e112a..7c9ac163d115 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -187,6 +187,14 @@ static int new_task_dmabuf_record(struct task_struct *task, struct dma_buf *dmab return -ENOMEM; task->dmabuf_info->rss += dmabuf->size; + /* + * task->dmabuf_info->lock protects against concurrent writers, so no + * worries about stale rss_hwm between the read and write, and we don't + * need to cmpxchg here. + */ + if (task->dmabuf_info->rss > task->dmabuf_info->rss_hwm) + task->dmabuf_info->rss_hwm = task->dmabuf_info->rss; + rec->dmabuf = dmabuf; rec->refcnt = 1; list_add(&rec->node, &task->dmabuf_info->dmabufs); diff --git a/fs/proc/base.c b/fs/proc/base.c index f7d8188b0ccf..6b91ddcab7e2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3321,6 +3321,82 @@ static int proc_dmabuf_rss_show(struct seq_file *m, struct pid_namespace *ns, return 0; } + +static int proc_dmabuf_rss_hwm_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *task; + int ret = 0; + + task = get_proc_task(inode); + if (!task) + return -ESRCH; + + if (!task->dmabuf_info) { + pr_err("%s dmabuf accounting record was not allocated\n", __func__); + ret = -ENOMEM; + goto out; + } + + if (!(task->flags & PF_KTHREAD)) + seq_printf(m, "%lld\n", READ_ONCE(task->dmabuf_info->rss_hwm)); + else + seq_puts(m, "0\n"); + +out: + put_task_struct(task); + + return ret; +} + +static int proc_dmabuf_rss_hwm_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, proc_dmabuf_rss_hwm_show, inode); +} + +static ssize_t +proc_dmabuf_rss_hwm_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *task; + unsigned long long val; + int ret; + + ret = kstrtoull_from_user(buf, count, 10, &val); + if (ret) + return ret; + + if (val != 0) + return -EINVAL; + + task = get_proc_task(inode); + if (!task) + return -ESRCH; + + if (!task->dmabuf_info) { + pr_err("%s dmabuf accounting record was not allocated\n", __func__); + ret = -ENOMEM; + goto out; + } + + spin_lock(&task->dmabuf_info->lock); + task->dmabuf_info->rss_hwm = task->dmabuf_info->rss; + spin_unlock(&task->dmabuf_info->lock); + +out: + put_task_struct(task); + + return ret < 0 ? ret : count; +} + +static const struct file_operations proc_dmabuf_rss_hwm_operations = { + .open = proc_dmabuf_rss_hwm_open, + .write = proc_dmabuf_rss_hwm_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* @@ -3448,6 +3524,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif #ifdef CONFIG_DMA_SHARED_BUFFER ONE("dmabuf_rss", S_IRUGO, proc_dmabuf_rss_show), + REG("dmabuf_rss_hwm", S_IRUGO|S_IWUSR, proc_dmabuf_rss_hwm_operations), #endif }; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 1647fb38fe80..a362c8ba7a21 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -659,8 +659,8 @@ struct task_dma_buf_record { }; /** - * struct task_dma_buf_info - Holds a RSS counter, and a list of dmabufs for all - * tasks that share both mm_struct and files_struct. + * struct task_dma_buf_info - Holds RSS and RSS HWM counters, and a list of + * dmabufs for all tasks that share both mm_struct and files_struct. * * @rss: The sum of all dmabuf memory referenced by the tasks via memory * mappings or file descriptors in bytes. Buffers referenced more than @@ -668,12 +668,15 @@ struct task_dma_buf_record { * of both mmaps and FDs) only cause the buffer to be accounted to the * process once. Partial mappings cause the full size of the buffer to be * accounted, regardless of the size of the mapping. + * @rss_hwm: The maximum value of @rss over the lifetime of this struct. (Unless, + * reset by userspace.) * @refcnt: The number of tasks sharing this struct. * @lock: Lock protecting writes for @rss, and reads/writes for @dmabufs. * @dmabufs: List of all dmabufs referenced by the tasks. */ struct task_dma_buf_info { s64 rss; + s64 rss_hwm; refcount_t refcnt; spinlock_t lock; struct list_head dmabufs; diff --git a/kernel/fork.c b/kernel/fork.c index 66636a979911..e1d7d244d43a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2314,6 +2314,7 @@ static int copy_dmabuf_info(u64 clone_flags, struct task_struct *p) if (current->dmabuf_info) { spin_lock(¤t->dmabuf_info->lock); p->dmabuf_info->rss = current->dmabuf_info->rss; + p->dmabuf_info->rss_hwm = current->dmabuf_info->rss; list_for_each_entry(rec, ¤t->dmabuf_info->dmabufs, node) { copy = kmalloc(sizeof(*copy), GFP_KERNEL); if (!copy) { @@ -2328,6 +2329,7 @@ static int copy_dmabuf_info(u64 clone_flags, struct task_struct *p) spin_unlock(¤t->dmabuf_info->lock); } else { p->dmabuf_info->rss = 0; + p->dmabuf_info->rss_hwm = 0; } return 0;