// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #define pr_fmt(fmt) "qti_virtio_mem: %s: " fmt, __func__ #include #include #include #include #include #include #include #include #include #include #include #include #include "qti_virtio_mem.h" struct qti_virtio_mem_hint { struct list_head list; struct list_head kernel_plugged_list; struct kref kref; struct file *filp; s64 size; char name[QTI_VIRTIO_MEM_IOC_MAX_NAME_LEN]; }; #define QTI_VIRTIO_MEM_MAX_DEVS 1 static dev_t qvm_dev_no; static struct class *qvm_class; static struct cdev qvm_char_dev; /* Protects qvm_hint_total and qvm_list */ static DEFINE_MUTEX(qvm_lock); static LIST_HEAD(qvm_list); static LIST_HEAD(qvm_kernel_plugged); static DEFINE_MUTEX(qvm_kernel_plugged_lock); /* Sum of all hints */ static s64 qvm_hint_total; static uint16_t kernel_plugged; #define QVM_OOM_NOTIFY_PRIORITY 90 /* Caller holds qvm_lock */ static int virtio_mem_update_config_size(s64 size, bool sync) { struct virtio_mem *vm = virtio_mem_dev; /* In future, may support multiple virtio_mem_devices for different zones */ if (!vm) return -EINVAL; /* Round up if request not properly aligned. */ if (vm->in_sbm) size = ALIGN(size, vm->sbm.sb_size); else size = ALIGN(size, vm->bbm.bb_size); if (size < 0 || size > vm->max_pluggable_size) return -EINVAL; vm->requested_size = size; virtio_mem_config_changed(vm->vdev); if (sync) { flush_work(&vm->wq); if (vm->requested_size != vm->plugged_size) { dev_err(&vm->vdev->dev, "Request failed: 0x%llx, plugged: 0x%llx\n", vm->requested_size, vm->plugged_size); return -ENOMEM; } } return 0; } static int qti_virtio_mem_hint_update(struct qti_virtio_mem_hint *hint, s64 new_size, bool sync) { int ret; s64 total = 0; lockdep_assert_held(&qvm_lock); total = qvm_hint_total + new_size - hint->size; ret = virtio_mem_update_config_size(total, sync); if (ret) { pr_debug("Hint %s: Invalid request %llx would result in %llx\n", hint->name, new_size, total); return ret; } hint->size = new_size; qvm_hint_total = total; pr_debug("Hint %s: Updated size %llx, new_requested_size %llx\n", hint->name, hint->size, qvm_hint_total); return ret; } static void qti_virtio_mem_hint_kref_release(struct kref *kref) { struct qti_virtio_mem_hint *hint; int rc; mutex_lock(&qvm_lock); hint = container_of(kref, struct qti_virtio_mem_hint, kref); rc = qti_virtio_mem_hint_update(hint, 0, true); if (rc) pr_err("Possible permanent plug of memory to vm\n"); list_del(&hint->list); mutex_unlock(&qvm_lock); kfree(hint); } static void *qti_virtio_mem_hint_create(char *name, s64 size) { struct qti_virtio_mem_hint *hint; lockdep_assert_held(&qvm_lock); hint = kzalloc(sizeof(*hint), GFP_KERNEL); if (!hint) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&hint->list); kref_init(&hint->kref); hint->size = 0; if (!name || !strlen(name)) name = "(none)"; strscpy(hint->name, name, ARRAY_SIZE(hint->name)); if (qti_virtio_mem_hint_update(hint, size, true)) { kfree(hint); return ERR_PTR(-EINVAL); } list_add(&hint->list, &qvm_list); return hint; } static void qti_virtio_mem_hint_release(void *handle) { struct qti_virtio_mem_hint *hint = handle; kref_put(&hint->kref, qti_virtio_mem_hint_kref_release); } static int qti_virtio_mem_hint_file_release(struct inode *inode, struct file *filp) { qti_virtio_mem_hint_release(filp->private_data); return 0; } static const struct file_operations qti_virtio_mem_hint_fops = { .release = qti_virtio_mem_hint_file_release, }; static int qti_virtio_mem_hint_create_fd(char *name, u64 size) { struct qti_virtio_mem_hint *hint; int fd; mutex_lock(&qvm_lock); hint = qti_virtio_mem_hint_create(name, size); mutex_unlock(&qvm_lock); if (IS_ERR(hint)) return PTR_ERR(hint); hint->filp = anon_inode_getfile("virtio_mem_hint", &qti_virtio_mem_hint_fops, hint, O_RDWR); if (IS_ERR(hint->filp)) { int ret = PTR_ERR(hint->filp); qti_virtio_mem_hint_release(hint); return ret; } fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { fput(hint->filp); return fd; } fd_install(fd, hint->filp); return fd; } union qti_virtio_mem_ioc_arg { struct qti_virtio_mem_ioc_hint_create_arg hint_create; }; static int qti_virtio_mem_ioc_hint_create(struct qti_virtio_mem_ioc_hint_create_arg *arg) { int fd; /* Validate arguments */ if (arg->size <= 0 || arg->reserved0 || arg->reserved1) return -EINVAL; /* ensure name is null-terminated */ arg->name[QTI_VIRTIO_MEM_IOC_MAX_NAME_LEN - 1] = '\0'; fd = qti_virtio_mem_hint_create_fd(arg->name, arg->size); if (fd < 0) return fd; arg->fd = fd; return 0; } static long qti_virtio_mem_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int ret; unsigned int dir = _IOC_DIR(cmd); union qti_virtio_mem_ioc_arg ioctl_arg; if (_IOC_SIZE(cmd) > sizeof(ioctl_arg)) return -EINVAL; if (copy_from_user(&ioctl_arg, (void __user *)arg, _IOC_SIZE(cmd))) return -EFAULT; if (!(dir & _IOC_WRITE)) memset(&ioctl_arg, 0, sizeof(ioctl_arg)); switch (cmd) { case QTI_VIRTIO_MEM_IOC_HINT_CREATE: { ret = qti_virtio_mem_ioc_hint_create(&ioctl_arg.hint_create); if (ret) return ret; break; } default: return -ENOTTY; } if (dir & _IOC_READ) { if (copy_to_user((void __user *)arg, &ioctl_arg, _IOC_SIZE(cmd))) return -EFAULT; } return 0; } static const struct file_operations qti_virtio_mem_dev_fops = { .unlocked_ioctl = qti_virtio_mem_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static ssize_t device_block_size_show(struct device *dev, struct device_attribute *attr, char *buf) { if (!virtio_mem_dev) return -ENODEV; return scnprintf(buf, PAGE_SIZE, "%lld\n", virtio_mem_dev->device_block_size); } static ssize_t max_plugin_threshold_show(struct device *dev, struct device_attribute *attr, char *buf) { if (!virtio_mem_dev) return -ENODEV; return scnprintf(buf, PAGE_SIZE, "%lld\n", virtio_mem_dev->max_pluggable_size); } static ssize_t device_block_plugged_show(struct device *dev, struct device_attribute *attr, char *buf) { uint16_t device_block_plugged; device_block_plugged = ALIGN(qvm_hint_total, virtio_mem_dev->device_block_size) / virtio_mem_dev->device_block_size; return scnprintf(buf, PAGE_SIZE, "%d\n", device_block_plugged); } static ssize_t kernel_plugged_show(struct device *dev, struct device_attribute *attr, char *buf) { return scnprintf(buf, PAGE_SIZE, "%d\n", kernel_plugged); } static ssize_t kernel_unplug_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { int val = 0, ret; uint16_t plugged_out; struct qti_virtio_mem_hint *hint; ret = kstrtoint(buf, 0, &val); if (ret < 0) return ret; mutex_lock(&qvm_kernel_plugged_lock); val = min_t(uint16_t, val, kernel_plugged); for (plugged_out = 0; plugged_out < val && !list_empty(&qvm_kernel_plugged); plugged_out++) { hint = list_first_entry(&qvm_kernel_plugged, struct qti_virtio_mem_hint, kernel_plugged_list); list_del(&hint->kernel_plugged_list); qti_virtio_mem_hint_release(hint); kernel_plugged--; } mutex_unlock(&qvm_kernel_plugged_lock); return size; } static DEVICE_ATTR_RO(device_block_size); static DEVICE_ATTR_RO(max_plugin_threshold); static DEVICE_ATTR_RO(device_block_plugged); static DEVICE_ATTR_RO(kernel_plugged); static DEVICE_ATTR_WO(kernel_unplug); static struct attribute *dev_attrs[] = { &dev_attr_device_block_size.attr, &dev_attr_max_plugin_threshold.attr, &dev_attr_device_block_plugged.attr, &dev_attr_kernel_plugged.attr, &dev_attr_kernel_unplug.attr, NULL, }; static struct attribute_group dev_group = { .attrs = dev_attrs, }; static inline unsigned long get_zone_free_pages(enum zone_type zone_class) { return zone_page_state(&NODE_DATA(numa_node_id())->node_zones[zone_class], NR_FREE_PAGES); } static int qvm_oom_notify(struct notifier_block *self, unsigned long dummy, void *parm) { unsigned long *freed = parm; struct qti_virtio_mem_hint *hint; uint64_t device_block_size = virtio_mem_dev->device_block_size; unsigned long free_pages; struct zone *z; z = &NODE_DATA(numa_node_id())->node_zones[ZONE_MOVABLE]; free_pages = get_zone_free_pages(ZONE_MOVABLE); /* add a block only if movable zone is exhausted */ if (free_pages > high_wmark_pages(z) + device_block_size / PAGE_SIZE) return NOTIFY_OK; if (qvm_hint_total >= virtio_mem_dev->max_pluggable_size) { pr_err_ratelimited("Out of pluggable memory\n"); return NOTIFY_OK; } pr_info_ratelimited("comm: %s totalram_pages: %lu Normal free_pages: %lu Movable free_pages: %lu\n", current->comm, totalram_pages(), get_zone_free_pages(ZONE_NORMAL), get_zone_free_pages(ZONE_MOVABLE)); if (!mutex_trylock(&qvm_lock)) { *freed = 1; return NOTIFY_OK; } hint = qti_virtio_mem_hint_create("qvm_oom_notifier", device_block_size); mutex_unlock(&qvm_lock); if (IS_ERR(hint)) { pr_err("failed to add memory\n"); return NOTIFY_OK; } mutex_lock(&qvm_kernel_plugged_lock); list_add(&hint->kernel_plugged_list, &qvm_kernel_plugged); *freed += device_block_size / PAGE_SIZE; kernel_plugged++; mutex_unlock(&qvm_kernel_plugged_lock); return NOTIFY_OK; } static struct notifier_block qvm_oom_nb = { .notifier_call = qvm_oom_notify, .priority = QVM_OOM_NOTIFY_PRIORITY, }; static int add_initial_blocks(struct device *dev) { u32 requested_size, size = 0; int ret; struct qti_virtio_mem_hint *hint, *tmp; uint64_t device_block_size = virtio_mem_dev->device_block_size; /* Optional */ if (of_property_read_u32(dev->of_node, "qcom,initial-movable-zone-size", &requested_size)) return 0; while (size < requested_size) { mutex_lock(&qvm_lock); hint = qti_virtio_mem_hint_create("init-movable-zone", device_block_size); mutex_unlock(&qvm_lock); if (IS_ERR(hint)) { ret = PTR_ERR(hint); goto err; } mutex_lock(&qvm_kernel_plugged_lock); list_add(&hint->kernel_plugged_list, &qvm_kernel_plugged); kernel_plugged++; mutex_unlock(&qvm_kernel_plugged_lock); size += device_block_size; } dev_info(dev, "Setup Movable Zone with size %x\n", size); return 0; err: mutex_lock(&qvm_kernel_plugged_lock); list_for_each_entry_safe(hint, tmp, &qvm_kernel_plugged, kernel_plugged_list) { list_del(&hint->kernel_plugged_list); qti_virtio_mem_hint_release(hint); kernel_plugged--; } mutex_unlock(&qvm_kernel_plugged_lock); return ret; } int qti_virtio_mem_init(struct platform_device *pdev) { int ret; struct device *dev; ret = alloc_chrdev_region(&qvm_dev_no, 0, QTI_VIRTIO_MEM_MAX_DEVS, "qti_virtio_mem"); if (ret < 0) goto err_chrdev_region; qvm_class = class_create("qti_virtio_mem"); if (IS_ERR(qvm_class)) { ret = PTR_ERR(qvm_class); goto err_class_create; } cdev_init(&qvm_char_dev, &qti_virtio_mem_dev_fops); ret = cdev_add(&qvm_char_dev, qvm_dev_no, 1); if (ret < 0) goto err_cdev_add; dev = device_create(qvm_class, NULL, qvm_dev_no, NULL, "qti_virtio_mem"); if (IS_ERR(dev)) { ret = PTR_ERR(dev); goto err_dev_create; } ret = sysfs_create_group(&dev->kobj, &dev_group); if (ret < 0) { dev_err(dev, "failed to create sysfs group\n"); goto err_dev_create; } ret = register_oom_notifier(&qvm_oom_nb); if (ret < 0) { dev_err(dev, "Failed to register to oom notifier\n"); goto err_dev_create; } ret = add_initial_blocks(&pdev->dev); if (ret) goto err_oom_notifier; return 0; err_oom_notifier: unregister_oom_notifier(&qvm_oom_nb); err_dev_create: cdev_del(&qvm_char_dev); err_cdev_add: class_destroy(qvm_class); err_class_create: unregister_chrdev_region(qvm_dev_no, QTI_VIRTIO_MEM_MAX_DEVS); err_chrdev_region: return ret; } void qti_virtio_mem_exit(struct platform_device *pdev) { struct device *dev; WARN(!list_empty(&qvm_list), "Unloading driver with nonzero hint objects\n"); unregister_oom_notifier(&qvm_oom_nb); dev = class_find_device_by_devt(qvm_class, qvm_dev_no); if (dev) sysfs_remove_group(&dev->kobj, &dev_group); device_destroy(qvm_class, qvm_dev_no); cdev_del(&qvm_char_dev); class_destroy(qvm_class); unregister_chrdev_region(qvm_dev_no, QTI_VIRTIO_MEM_MAX_DEVS); }