// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include "vm_mgr.h" #include #define MAX_VCPU_NAME 20 /* gh-vcpu:strlen(U32::MAX)+NUL */ /** * struct gunyah_vcpu - Track an instance of gunyah vCPU * @f: Function instance (how we get associated with the main VM) * @rsc: Pointer to the Gunyah vCPU resource, will be NULL until VM starts * @run_lock: One userspace thread at a time should run the vCPU * @ghvm: Pointer to the main VM struct; quicker look up than going through * @f->ghvm * @vcpu_run: Pointer to page shared with userspace to communicate vCPU state * @state: Our copy of the state of the vCPU, since userspace could trick * kernel to behave incorrectly if we relied on @vcpu_run * @mmio_read_len: Our copy of @vcpu_run->mmio.len; see also @state * @mmio_addr: Our copy of @vcpu_run->mmio.phys_addr; see also @state * @ready: if vCPU goes to sleep, hypervisor reports to us that it's sleeping * and will signal interrupt (from @rsc) when it's time to wake up. * This completion signals that we can run vCPU again. * @nb: When VM exits, the status of VM is reported via @vcpu_run->status. * We need to track overall VM status, and the nb gives us the updates from * Resource Manager. * @ticket: resource ticket to claim vCPU# for the VM * @kref: Reference counter */ struct gunyah_vcpu { struct gunyah_vm_function_instance *f; struct gunyah_resource *rsc; struct mutex run_lock; struct gunyah_vm *ghvm; struct gunyah_vcpu_run *vcpu_run; /** * Track why the vcpu_run hypercall returned. This mirrors the vcpu_run * structure shared with userspace, except is used internally to avoid * trusting userspace to not modify the vcpu_run structure. */ enum { GUNYAH_VCPU_RUN_STATE_UNKNOWN = 0, GUNYAH_VCPU_RUN_STATE_READY, GUNYAH_VCPU_RUN_STATE_MMIO_READ, GUNYAH_VCPU_RUN_STATE_MMIO_WRITE, GUNYAH_VCPU_RUN_STATE_SYSTEM_DOWN, } state; u8 mmio_read_len; u64 mmio_addr; struct completion ready; struct notifier_block nb; struct gunyah_vm_resource_ticket ticket; struct kref kref; }; static void vcpu_release(struct kref *kref) { struct gunyah_vcpu *vcpu = container_of(kref, struct gunyah_vcpu, kref); free_page((unsigned long)vcpu->vcpu_run); kfree(vcpu); } /* * When hypervisor allows us to schedule vCPU again, it gives us an interrupt */ static irqreturn_t gunyah_vcpu_irq_handler(int irq, void *data) { struct gunyah_vcpu *vcpu = data; complete(&vcpu->ready); return IRQ_HANDLED; } static bool gunyah_handle_page_fault( struct gunyah_vcpu *vcpu, const struct gunyah_hypercall_vcpu_run_resp *vcpu_run_resp) { u64 addr = vcpu_run_resp->state_data[0]; bool write = !!vcpu_run_resp->state_data[1]; int ret = 0; ret = gunyah_gup_demand_page(vcpu->ghvm, addr, write); if (!ret || ret == -EAGAIN) return true; vcpu->vcpu_run->page_fault.resume_action = GUNYAH_VCPU_RESUME_FAULT; vcpu->vcpu_run->page_fault.attempt = ret; vcpu->vcpu_run->page_fault.phys_addr = addr; vcpu->vcpu_run->exit_reason = GUNYAH_VCPU_EXIT_PAGE_FAULT; return false; } static bool gunyah_handle_mmio(struct gunyah_vcpu *vcpu, unsigned long resume_data[3], const struct gunyah_hypercall_vcpu_run_resp *vcpu_run_resp) { u64 addr = vcpu_run_resp->state_data[0], len = vcpu_run_resp->state_data[1], data = vcpu_run_resp->state_data[2]; int ret; if (WARN_ON(len > sizeof(u64))) len = sizeof(u64); ret = gunyah_gup_demand_page(vcpu->ghvm, addr, vcpu->vcpu_run->mmio.is_write); if (!ret || ret == -EAGAIN) { resume_data[1] = GUNYAH_ADDRSPACE_VMMIO_ACTION_RETRY; return true; } if (vcpu_run_resp->state == GUNYAH_VCPU_ADDRSPACE_VMMIO_READ) { vcpu->vcpu_run->mmio.is_write = 0; /* Record that we need to give vCPU user's supplied value next gunyah_vcpu_run() */ vcpu->state = GUNYAH_VCPU_RUN_STATE_MMIO_READ; vcpu->mmio_read_len = len; } else { /* GUNYAH_VCPU_ADDRSPACE_VMMIO_WRITE */ if (!gunyah_vm_mmio_write(vcpu->ghvm, addr, len, data)) { resume_data[0] = GUNYAH_ADDRSPACE_VMMIO_ACTION_EMULATE; return true; } vcpu->vcpu_run->mmio.is_write = 1; memcpy(vcpu->vcpu_run->mmio.data, &data, len); vcpu->state = GUNYAH_VCPU_RUN_STATE_MMIO_WRITE; } /* Assume userspace is okay and handles the access due to existing userspace */ vcpu->vcpu_run->mmio.resume_action = GUNYAH_VCPU_RESUME_HANDLED; vcpu->mmio_addr = vcpu->vcpu_run->mmio.phys_addr = addr; vcpu->vcpu_run->mmio.len = len; vcpu->vcpu_run->exit_reason = GUNYAH_VCPU_EXIT_MMIO; return false; } static int gunyah_handle_mmio_resume(struct gunyah_vcpu *vcpu, unsigned long resume_data[3]) { switch (vcpu->vcpu_run->mmio.resume_action) { case GUNYAH_VCPU_RESUME_HANDLED: if (vcpu->state == GUNYAH_VCPU_RUN_STATE_MMIO_READ) { if (unlikely(vcpu->mmio_read_len > sizeof(resume_data[0]))) vcpu->mmio_read_len = sizeof(resume_data[0]); memcpy(&resume_data[0], vcpu->vcpu_run->mmio.data, vcpu->mmio_read_len); } resume_data[1] = GUNYAH_ADDRSPACE_VMMIO_ACTION_EMULATE; break; case GUNYAH_VCPU_RESUME_FAULT: resume_data[1] = GUNYAH_ADDRSPACE_VMMIO_ACTION_FAULT; break; default: return -EINVAL; } return 0; } static int gunyah_vcpu_rm_notification(struct notifier_block *nb, unsigned long action, void *data) { struct gunyah_vcpu *vcpu = container_of(nb, struct gunyah_vcpu, nb); struct gunyah_rm_vm_exited_payload *exit_payload = data; /* Wake up userspace waiting for the vCPU to be runnable again */ if (action == GUNYAH_RM_NOTIFICATION_VM_EXITED && le16_to_cpu(exit_payload->vmid) == vcpu->ghvm->vmid) complete(&vcpu->ready); return NOTIFY_OK; } static inline enum gunyah_vm_status remap_vm_status(enum gunyah_rm_vm_status rm_status) { switch (rm_status) { case GUNYAH_RM_VM_STATUS_INIT_FAILED: return GUNYAH_VM_STATUS_LOAD_FAILED; case GUNYAH_RM_VM_STATUS_EXITED: return GUNYAH_VM_STATUS_EXITED; default: return GUNYAH_VM_STATUS_CRASHED; } } /** * gunyah_vcpu_check_system() - Check whether VM as a whole is running * @vcpu: Pointer to gunyah_vcpu * * Returns true if the VM is alive. * Returns false if the vCPU is the VM is not alive (can only be that VM is shutting down). */ static bool gunyah_vcpu_check_system(struct gunyah_vcpu *vcpu) __must_hold(&vcpu->run_lock) { bool ret = true; down_read(&vcpu->ghvm->status_lock); if (likely(vcpu->ghvm->vm_status == GUNYAH_RM_VM_STATUS_RUNNING)) goto out; vcpu->vcpu_run->status.status = remap_vm_status(vcpu->ghvm->vm_status); vcpu->vcpu_run->status.exit_info = vcpu->ghvm->exit_info; vcpu->vcpu_run->exit_reason = GUNYAH_VCPU_EXIT_STATUS; vcpu->state = GUNYAH_VCPU_RUN_STATE_SYSTEM_DOWN; ret = false; out: up_read(&vcpu->ghvm->status_lock); return ret; } /* * We must have interrupts enabled when making the hypercall to switch to * guest vcpu, else guest vcpu runs until end of hypervisor scheduling time * slice and also increases interrupt latency. Native vtime accounting * requires that interrupts are disabled, so we can't do accounting. */ #if IS_ENABLED(CONFIG_TICK_CPU_ACCOUNTING) static inline void gh_guest_accounting_enter(void) { vtime_account_guest_enter(); } static inline void gh_guest_accounting_exit(void) { vtime_account_guest_exit(); } #else /* !CONFIG_TICK_CPU_ACCOUNTING */ static inline void gh_guest_accounting_enter(void) { } static inline void gh_guest_accounting_exit(void) { } #endif /* CONFIG_TICK_CPU_ACCOUNTING */ /** * gunyah_vcpu_run() - Request Gunyah to begin scheduling this vCPU. * @vcpu: The client descriptor that was obtained via gunyah_vcpu_alloc() */ static int gunyah_vcpu_run(struct gunyah_vcpu *vcpu) { struct gunyah_hypercall_vcpu_run_resp vcpu_run_resp; unsigned long resume_data[3] = { 0 }; enum gunyah_error gunyah_error; int ret = 0; if (!vcpu->f) return -ENODEV; if (mutex_lock_interruptible(&vcpu->run_lock)) return -ERESTARTSYS; if (!vcpu->rsc) { ret = -ENODEV; goto out; } switch (vcpu->state) { case GUNYAH_VCPU_RUN_STATE_UNKNOWN: if (vcpu->ghvm->vm_status != GUNYAH_RM_VM_STATUS_RUNNING) { /** * Check if VM is up. If VM is starting, will block * until VM is fully up since that thread does * down_write. */ if (!gunyah_vcpu_check_system(vcpu)) goto out; } vcpu->state = GUNYAH_VCPU_RUN_STATE_READY; break; case GUNYAH_VCPU_RUN_STATE_MMIO_READ: case GUNYAH_VCPU_RUN_STATE_MMIO_WRITE: ret = gunyah_handle_mmio_resume(vcpu, resume_data); if (ret) goto out; vcpu->state = GUNYAH_VCPU_RUN_STATE_READY; break; case GUNYAH_VCPU_RUN_STATE_SYSTEM_DOWN: goto out; default: break; } if (current->mm != vcpu->ghvm->mm_s) { ret = -EPERM; goto out; } while (!ret && !signal_pending(current)) { if (vcpu->vcpu_run->immediate_exit) { ret = -EINTR; goto out; } gh_guest_accounting_enter(); gunyah_error = gunyah_hypercall_vcpu_run( vcpu->rsc->capid, resume_data, &vcpu_run_resp); gh_guest_accounting_exit(); if (gunyah_error == GUNYAH_ERROR_OK) { memset(resume_data, 0, sizeof(resume_data)); switch (vcpu_run_resp.state) { case GUNYAH_VCPU_STATE_READY: if (need_resched()) schedule(); break; case GUNYAH_VCPU_STATE_POWERED_OFF: /** * vcpu might be off because the VM is shut down * If so, it won't ever run again */ if (!gunyah_vcpu_check_system(vcpu)) goto out; /** * Otherwise, another vcpu will turn it on (e.g. * by PSCI) and hyp sends an interrupt to wake * Linux up. */ fallthrough; case GUNYAH_VCPU_STATE_EXPECTS_WAKEUP: ret = wait_for_completion_interruptible( &vcpu->ready); /** * reinitialize completion before next * hypercall. If we reinitialize after the * hypercall, interrupt may have already come * before re-initializing the completion and * then end up waiting for event that already * happened. */ reinit_completion(&vcpu->ready); /** * Check VM status again. Completion * might've come from VM exiting */ if (!ret && !gunyah_vcpu_check_system(vcpu)) goto out; break; case GUNYAH_VCPU_STATE_BLOCKED: schedule(); break; case GUNYAH_VCPU_ADDRSPACE_VMMIO_READ: case GUNYAH_VCPU_ADDRSPACE_VMMIO_WRITE: if (!gunyah_handle_mmio(vcpu, resume_data, &vcpu_run_resp)) goto out; break; case GUNYAH_VCPU_ADDRSPACE_PAGE_FAULT: if (!gunyah_handle_page_fault(vcpu, &vcpu_run_resp)) goto out; break; default: pr_warn_ratelimited( "Unknown vCPU state: %llx\n", vcpu_run_resp.sized_state); schedule(); break; } } else if (gunyah_error == GUNYAH_ERROR_RETRY) { schedule(); } else { ret = gunyah_error_remap(gunyah_error); } } out: mutex_unlock(&vcpu->run_lock); if (signal_pending(current)) return -ERESTARTSYS; return ret; } static long gunyah_vcpu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct gunyah_vcpu *vcpu = filp->private_data; long ret = -ENOTTY; switch (cmd) { case GUNYAH_VCPU_RUN: ret = gunyah_vcpu_run(vcpu); break; case GUNYAH_VCPU_MMAP_SIZE: ret = PAGE_SIZE; break; default: break; } return ret; } static int gunyah_vcpu_release(struct inode *inode, struct file *filp) { struct gunyah_vcpu *vcpu = filp->private_data; gunyah_vm_put(vcpu->ghvm); kref_put(&vcpu->kref, vcpu_release); return 0; } static vm_fault_t gunyah_vcpu_fault(struct vm_fault *vmf) { struct gunyah_vcpu *vcpu = vmf->vma->vm_file->private_data; struct page *page; if (vmf->pgoff) return VM_FAULT_SIGBUS; page = virt_to_page(vcpu->vcpu_run); get_page(page); vmf->page = page; return 0; } static const struct vm_operations_struct gunyah_vcpu_ops = { .fault = gunyah_vcpu_fault, }; static int gunyah_vcpu_mmap(struct file *file, struct vm_area_struct *vma) { vma->vm_ops = &gunyah_vcpu_ops; return 0; } static const struct file_operations gunyah_vcpu_fops = { .owner = THIS_MODULE, .unlocked_ioctl = gunyah_vcpu_ioctl, .release = gunyah_vcpu_release, .llseek = noop_llseek, .mmap = gunyah_vcpu_mmap, }; static bool gunyah_vcpu_populate(struct gunyah_vm_resource_ticket *ticket, struct gunyah_resource *ghrsc) { struct gunyah_vcpu *vcpu = container_of(ticket, struct gunyah_vcpu, ticket); int ret; mutex_lock(&vcpu->run_lock); if (vcpu->rsc) { pr_warn("vcpu%d already got a Gunyah resource. Check if multiple resources with same label were configured.\n", vcpu->ticket.label); ret = -EEXIST; goto out; } vcpu->rsc = ghrsc; ret = request_irq(vcpu->rsc->irq, gunyah_vcpu_irq_handler, IRQF_TRIGGER_RISING, "gunyah_vcpu", vcpu); if (ret) { pr_warn("Failed to request vcpu irq %d: %d", vcpu->rsc->irq, ret); goto out; } enable_irq_wake(vcpu->rsc->irq); out: mutex_unlock(&vcpu->run_lock); return !ret; } static void gunyah_vcpu_unpopulate(struct gunyah_vm_resource_ticket *ticket, struct gunyah_resource *ghrsc) { struct gunyah_vcpu *vcpu = container_of(ticket, struct gunyah_vcpu, ticket); vcpu->vcpu_run->immediate_exit = true; complete_all(&vcpu->ready); mutex_lock(&vcpu->run_lock); free_irq(vcpu->rsc->irq, vcpu); vcpu->rsc = NULL; mutex_unlock(&vcpu->run_lock); } static long gunyah_vcpu_bind(struct gunyah_vm_function_instance *f) { struct gunyah_fn_vcpu_arg *arg = f->argp; struct gunyah_vcpu *vcpu; char name[MAX_VCPU_NAME]; struct file *file; struct page *page; int fd; long r; if (f->arg_size != sizeof(*arg)) return -EINVAL; vcpu = kzalloc(sizeof(*vcpu), GFP_KERNEL); if (!vcpu) return -ENOMEM; vcpu->f = f; f->data = vcpu; mutex_init(&vcpu->run_lock); kref_init(&vcpu->kref); init_completion(&vcpu->ready); page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { r = -ENOMEM; goto err_destroy_vcpu; } vcpu->vcpu_run = page_address(page); vcpu->ticket.resource_type = GUNYAH_RESOURCE_TYPE_VCPU; vcpu->ticket.label = arg->id; vcpu->ticket.owner = THIS_MODULE; vcpu->ticket.populate = gunyah_vcpu_populate; vcpu->ticket.unpopulate = gunyah_vcpu_unpopulate; r = gunyah_vm_add_resource_ticket(f->ghvm, &vcpu->ticket); if (r) goto err_destroy_page; if (!gunyah_vm_get(f->ghvm)) { r = -ENODEV; goto err_remove_resource_ticket; } vcpu->ghvm = f->ghvm; vcpu->nb.notifier_call = gunyah_vcpu_rm_notification; /** * Ensure we run after the vm_mgr handles the notification and does * any necessary state changes. */ vcpu->nb.priority = -1; r = gunyah_rm_notifier_register(f->rm, &vcpu->nb); if (r) goto err_put_gunyah_vm; kref_get(&vcpu->kref); fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { r = fd; goto err_notifier; } snprintf(name, sizeof(name), "gh-vcpu:%u", vcpu->ticket.label); file = anon_inode_getfile(name, &gunyah_vcpu_fops, vcpu, O_RDWR); if (IS_ERR(file)) { r = PTR_ERR(file); goto err_put_fd; } fd_install(fd, file); return fd; err_put_fd: put_unused_fd(fd); err_notifier: gunyah_rm_notifier_unregister(f->rm, &vcpu->nb); err_put_gunyah_vm: gunyah_vm_put(vcpu->ghvm); err_remove_resource_ticket: gunyah_vm_remove_resource_ticket(f->ghvm, &vcpu->ticket); err_destroy_page: free_page((unsigned long)vcpu->vcpu_run); err_destroy_vcpu: kfree(vcpu); return r; } static void gunyah_vcpu_unbind(struct gunyah_vm_function_instance *f) { struct gunyah_vcpu *vcpu = f->data; gunyah_rm_notifier_unregister(f->rm, &vcpu->nb); gunyah_vm_remove_resource_ticket(vcpu->ghvm, &vcpu->ticket); vcpu->f = NULL; kref_put(&vcpu->kref, vcpu_release); } static bool gunyah_vcpu_compare(const struct gunyah_vm_function_instance *f, const void *arg, size_t size) { const struct gunyah_fn_vcpu_arg *instance = f->argp, *other = arg; if (sizeof(*other) != size) return false; return instance->id == other->id; } DECLARE_GUNYAH_VM_FUNCTION_INIT(vcpu, GUNYAH_FN_VCPU, 1, gunyah_vcpu_bind, gunyah_vcpu_unbind, gunyah_vcpu_compare); MODULE_DESCRIPTION("Gunyah vCPU Function"); MODULE_LICENSE("GPL");