// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include #include #include #include #include "trace.h" #ifdef CONFIG_HOTPLUG_CPU enum pause_type { HALT, PARTIAL_HALT, MAX_PAUSE_TYPE }; /* if a cpu is halting */ struct cpumask __cpu_halt_mask; struct cpumask __cpu_partial_halt_mask; /* spin lock to allow calling from non-preemptible context */ static DEFINE_RAW_SPINLOCK(halt_lock); struct halt_cpu_state { u8 client_vote_mask[MAX_PAUSE_TYPE]; }; static DEFINE_PER_CPU(struct halt_cpu_state, halt_state); static DEFINE_RAW_SPINLOCK(walt_drain_pending_lock); /* the amount of time allowed for enqueue operations that happen * just after a halt operation. */ #define WALT_HALT_CHECK_THRESHOLD_NS 400000 /* * Remove a task from the runqueue and pretend that it's migrating. This * should prevent migrations for the detached task and disallow further * changes to tsk_cpus_allowed. */ void detach_one_task_core(struct task_struct *p, struct rq *rq, struct list_head *tasks) { walt_lockdep_assert_rq(rq, p); p->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(rq, p, 0); list_add(&p->se.group_node, tasks); } void attach_tasks_core(struct list_head *tasks, struct rq *rq) { struct task_struct *p; walt_lockdep_assert_rq(rq, NULL); while (!list_empty(tasks)) { p = list_first_entry(tasks, struct task_struct, se.group_node); list_del_init(&p->se.group_node); BUG_ON(task_rq(p) != rq); activate_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; } } /* * Migrate all tasks from the rq, sleeping tasks will be migrated by * try_to_wake_up()->select_task_rq(). * * Called with rq->__lock held even though we'er in stop_machine() and * there's no concurrency possible, we hold the required locks anyway * because of lock validation efforts. * * The function will skip CPU pinned kthreads. */ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) { struct rq *rq = dead_rq; struct task_struct *next, *stop = rq->stop; LIST_HEAD(percpu_kthreads); unsigned int num_pinned_kthreads = 1; struct rq_flags orf = *rf; int dest_cpu; /* * Fudge the rq selection such that the below task selection loop * doesn't get stuck on the currently eligible stop task. * * We're currently inside stop_machine() and the rq is either stuck * in the stop_machine_cpu_stop() loop, or we're executing this code, * either way we should never end up calling schedule() until we're * done here. */ rq->stop = NULL; /* * put_prev_task() and pick_next_task() sched * class method both need to have an up-to-date * value of rq->clock[_task] */ update_rq_clock(rq); #ifdef CONFIG_SCHED_DEBUG /* note the clock update in orf */ orf.clock_update_flags |= RQCF_UPDATED; #endif for (;;) { /* * There's this thread running, bail when that's the only * remaining thread: */ if (rq->nr_running == 1) break; next = pick_migrate_task(rq); /* * Argh ... no iterator for tasks, we need to remove the * kthread from the run-queue to continue. */ if (is_per_cpu_kthread(next)) { detach_one_task_core(next, rq, &percpu_kthreads); num_pinned_kthreads += 1; continue; } /* * Rules for changing task_struct::cpus_mask are holding * both pi_lock and rq->__lock, such that holding either * stabilizes the mask. * * Drop rq->__lock is not quite as disastrous as it usually is * because !cpu_active at this point, which means load-balance * will not interfere. Also, stop-machine. */ rq_unlock(rq, rf); raw_spin_lock(&next->pi_lock); raw_spin_rq_lock(rq); rq_repin_lock(rq, rf); /* * Since we're inside stop-machine, _nothing_ should have * changed the task, WARN if weird stuff happened, because in * that case the above rq->__lock drop is a fail too. */ if (task_rq(next) != rq || !task_on_rq_queued(next)) { raw_spin_unlock(&next->pi_lock); continue; } /* Find suitable destination for @next */ dest_cpu = select_fallback_rq(dead_rq->cpu, next); if (cpu_of(rq) != dest_cpu && !is_migration_disabled(next)) { /* only perform a required migration */ rq = __migrate_task(rq, rf, next, dest_cpu); if (rq != dead_rq) { rq_unlock(rq, rf); rq = dead_rq; *rf = orf; raw_spin_rq_lock(rq); rq_repin_lock(rq, rf); } } else { detach_one_task_core(next, rq, &percpu_kthreads); num_pinned_kthreads += 1; } raw_spin_unlock(&next->pi_lock); } if (num_pinned_kthreads > 1) attach_tasks_core(&percpu_kthreads, rq); rq->stop = stop; } void __balance_callbacks(struct rq *rq); static int drain_rq_cpu_stop(void *data) { struct rq *rq = this_rq(); struct rq_flags rf; struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq)); rq_lock_irqsave(rq, &rf); /* rq lock is pinned */ /* migrate tasks assumes that the lock is pinned, and will unlock/repin */ migrate_tasks(rq, &rf); /* __balance_callbacks can unlock and relock the rq lock. unpin */ rq_unpin_lock(rq, &rf); /* * service any callbacks that were accumulated, prior to unlocking. such that * any subsequent calls to rq_lock... will see an rq->balance_callback set to * the default (0 or balance_push_callback); */ wrq->enqueue_counter = 0; __balance_callbacks(rq); if (wrq->enqueue_counter) WALT_BUG(WALT_BUG_WALT, NULL, "cpu: %d task was re-enqueued", cpu_of(rq)); /* lock is no longer pinned, raw unlock using same flags as locking */ raw_spin_rq_unlock_irqrestore(rq, rf.flags); return 0; } static int cpu_drain_rq(unsigned int cpu) { if (!cpu_online(cpu)) return 0; if (available_idle_cpu(cpu)) return 0; /* this will schedule, must not be in atomic context */ return stop_one_cpu(cpu, drain_rq_cpu_stop, NULL); } struct drain_thread_data { cpumask_t cpus_to_drain; }; static struct drain_thread_data drain_data = { .cpus_to_drain = { CPU_BITS_NONE } }; static int __ref try_drain_rqs(void *data) { cpumask_t *cpus_ptr = &((struct drain_thread_data *)data)->cpus_to_drain; int cpu; unsigned long flags; while (!kthread_should_stop()) { raw_spin_lock_irqsave(&walt_drain_pending_lock, flags); if (cpumask_weight(cpus_ptr)) { cpumask_t local_cpus; cpumask_copy(&local_cpus, cpus_ptr); raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags); for_each_cpu(cpu, &local_cpus) cpu_drain_rq(cpu); raw_spin_lock_irqsave(&walt_drain_pending_lock, flags); cpumask_andnot(cpus_ptr, cpus_ptr, &local_cpus); } raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); } return 0; } void restrict_cpus_and_freq(struct cpumask *cpus) { struct cpumask restrict_cpus; int cpu = 0; cpumask_copy(&restrict_cpus, cpus); if (cpumask_intersects(cpus, cpu_partial_halt_mask) && !cpumask_intersects(cpus, cpu_halt_mask) && is_state1()) { for_each_cpu(cpu, cpus) freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] = sysctl_max_freq_partial_halt; } else { for_each_cpu(cpu, cpus) { cpumask_or(&restrict_cpus, &restrict_cpus, &(cpu_cluster(cpu)->cpus)); freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] = FREQ_QOS_MAX_DEFAULT_VALUE; } } update_smart_freq_capacities(); } struct task_struct *walt_drain_thread; static int halt_cpus(struct cpumask *cpus, enum pause_type type) { int cpu; int ret = 0; u64 start_time = 0; struct halt_cpu_state *halt_cpu_state; unsigned long flags; if (trace_halt_cpus_enabled()) start_time = sched_clock(); trace_halt_cpus_start(cpus, 1); /* add the cpus to the halt mask */ for_each_cpu(cpu, cpus) { if (cpu == cpumask_first(system_32bit_el0_cpumask())) { ret = -EINVAL; goto out; } halt_cpu_state = per_cpu_ptr(&halt_state, cpu); if (type == HALT) cpumask_set_cpu(cpu, cpu_halt_mask); else cpumask_set_cpu(cpu, cpu_partial_halt_mask); /* guarantee mask written at this time */ wmb(); } restrict_cpus_and_freq(cpus); /* migrate tasks off the cpu */ if (type == HALT) { /* signal and wakeup the drain kthread */ raw_spin_lock_irqsave(&walt_drain_pending_lock, flags); cpumask_or(&drain_data.cpus_to_drain, &drain_data.cpus_to_drain, cpus); raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags); wake_up_process(walt_drain_thread); } out: trace_halt_cpus(cpus, start_time, 1, ret); return ret; } /* start the cpus again, and kick them to balance */ static int start_cpus(struct cpumask *cpus, enum pause_type type) { u64 start_time = sched_clock(); struct halt_cpu_state *halt_cpu_state; int cpu; trace_halt_cpus_start(cpus, 0); for_each_cpu(cpu, cpus) { halt_cpu_state = per_cpu_ptr(&halt_state, cpu); /* guarantee the halt state is updated */ wmb(); if (type == HALT) cpumask_clear_cpu(cpu, cpu_halt_mask); else cpumask_clear_cpu(cpu, cpu_partial_halt_mask); /* kick the cpu so it can pull tasks * after the mask has been cleared. */ walt_smp_call_newidle_balance(cpu); } restrict_cpus_and_freq(cpus); trace_halt_cpus(cpus, start_time, 0, 0); return 0; } /* update client for cpus in yield/halt mask */ static void update_clients(struct cpumask *cpus, bool halt, enum pause_client client, enum pause_type type) { int cpu; struct halt_cpu_state *halt_cpu_state; for_each_cpu(cpu, cpus) { halt_cpu_state = per_cpu_ptr(&halt_state, cpu); if (halt) halt_cpu_state->client_vote_mask[type] |= client; else halt_cpu_state->client_vote_mask[type] &= ~client; } } /* remove cpus that are already halted */ static void update_halt_cpus(struct cpumask *cpus, enum pause_type type) { int cpu; struct halt_cpu_state *halt_cpu_state; for_each_cpu(cpu, cpus) { halt_cpu_state = per_cpu_ptr(&halt_state, cpu); if (halt_cpu_state->client_vote_mask[type]) cpumask_clear_cpu(cpu, cpus); } } /* cpus will be modified */ static int walt_halt_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type) { int ret = 0; cpumask_t requested_cpus; unsigned long flags; raw_spin_lock_irqsave(&halt_lock, flags); cpumask_copy(&requested_cpus, cpus); /* remove cpus that are already halted */ update_halt_cpus(cpus, type); if (cpumask_empty(cpus)) { update_clients(&requested_cpus, true, client, type); goto unlock; } ret = halt_cpus(cpus, type); if (ret < 0) pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret, cpumask_pr_args(&requested_cpus)); else update_clients(&requested_cpus, true, client, type); unlock: raw_spin_unlock_irqrestore(&halt_lock, flags); return ret; } int walt_pause_cpus(struct cpumask *cpus, enum pause_client client) { if (walt_disabled) return -EAGAIN; return walt_halt_cpus(cpus, client, HALT); } EXPORT_SYMBOL_GPL(walt_pause_cpus); int walt_partial_pause_cpus(struct cpumask *cpus, enum pause_client client) { if (walt_disabled) return -EAGAIN; return walt_halt_cpus(cpus, client, PARTIAL_HALT); } EXPORT_SYMBOL_GPL(walt_partial_pause_cpus); /* cpus will be modified */ static int walt_start_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type) { int ret = 0; cpumask_t requested_cpus; unsigned long flags; raw_spin_lock_irqsave(&halt_lock, flags); cpumask_copy(&requested_cpus, cpus); update_clients(&requested_cpus, false, client, type); /* remove cpus that should still be halted */ update_halt_cpus(cpus, type); ret = start_cpus(cpus, type); if (ret < 0) { pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret, cpumask_pr_args(&requested_cpus)); /* restore/increment ref counts in case of error */ update_clients(&requested_cpus, true, client, type); } raw_spin_unlock_irqrestore(&halt_lock, flags); return ret; } int walt_resume_cpus(struct cpumask *cpus, enum pause_client client) { if (walt_disabled) return -EAGAIN; return walt_start_cpus(cpus, client, HALT); } EXPORT_SYMBOL_GPL(walt_resume_cpus); int walt_partial_resume_cpus(struct cpumask *cpus, enum pause_client client) { if (walt_disabled) return -EAGAIN; return walt_start_cpus(cpus, client, PARTIAL_HALT); } EXPORT_SYMBOL_GPL(walt_partial_resume_cpus); /* return true if the requested client has fully halted one of the cpus */ bool cpus_halted_by_client(struct cpumask *cpus, enum pause_client client) { struct halt_cpu_state *halt_cpu_state; int cpu; for_each_cpu(cpu, cpus) { halt_cpu_state = per_cpu_ptr(&halt_state, cpu); if ((bool)(halt_cpu_state->client_vote_mask[HALT] & client)) return true; } return false; } static void android_rvh_get_nohz_timer_target(void *unused, int *cpu, bool *done) { int i, default_cpu = -1; struct sched_domain *sd; cpumask_t active_unhalted; *done = true; cpumask_andnot(&active_unhalted, cpu_active_mask, cpu_halt_mask); if (housekeeping_cpu(*cpu, HK_TYPE_TIMER) && !cpu_halted(*cpu)) { if (!available_idle_cpu(*cpu)) return; default_cpu = *cpu; } /* * find first cpu halted by core control and try to avoid * affecting externally halted cpus. */ if (!cpumask_weight(&active_unhalted)) { cpumask_t tmp_pause, tmp_part_pause, tmp_halt, *tmp; cpumask_and(&tmp_part_pause, cpu_active_mask, &cpus_part_paused_by_us); cpumask_and(&tmp_pause, cpu_active_mask, &cpus_paused_by_us); cpumask_and(&tmp_halt, cpu_active_mask, cpu_halt_mask); tmp = cpumask_weight(&tmp_part_pause) ? &tmp_part_pause : cpumask_weight(&tmp_pause) ? &tmp_pause : &tmp_halt; for_each_cpu(i, tmp) { if ((*cpu == i) && cpumask_weight(tmp) > 1) continue; *cpu = i; return; } } rcu_read_lock(); for_each_domain(*cpu, sd) { for_each_cpu_and(i, sched_domain_span(sd), housekeeping_cpumask(HK_TYPE_TIMER)) { if (*cpu == i) continue; if (!available_idle_cpu(i) && !cpu_halted(i)) { *cpu = i; goto unlock; } } } if (default_cpu == -1) { for_each_cpu_and(i, &active_unhalted, housekeeping_cpumask(HK_TYPE_TIMER)) { if (*cpu == i) continue; if (!available_idle_cpu(i)) { *cpu = i; goto unlock; } } /* choose any active unhalted cpu */ default_cpu = cpumask_any(&active_unhalted); if (unlikely(default_cpu >= nr_cpu_ids)) goto unlock; } *cpu = default_cpu; unlock: rcu_read_unlock(); } /** * android_rvh_set_cpus_allowed_by_task: disallow cpus that are halted * * NOTES: may be called if migration is disabled for the task * if per-cpu-kthread, must not deliberately return an invalid cpu * if !per-cpu-kthread, may return an invalid cpu (reject dest_cpu) * must not change cpu in in_exec 32bit task case */ static void android_rvh_set_cpus_allowed_by_task(void *unused, const struct cpumask *cpu_valid_mask, const struct cpumask *new_mask, struct task_struct *p, unsigned int *dest_cpu) { if (unlikely(walt_disabled)) return; /* allow kthreads to change affinity regardless of halt status of dest_cpu */ if (p->flags & PF_KTHREAD) return; if (cpu_halted(*dest_cpu) && !p->migration_disabled) { cpumask_t allowed_cpus; if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve)) return; /* remove halted cpus from the valid mask, and store locally */ cpumask_andnot(&allowed_cpus, cpu_valid_mask, cpu_halt_mask); cpumask_and(&allowed_cpus, &allowed_cpus, new_mask); /* do not modify dest_cpu if there are no cpus to choose from */ if (!cpumask_empty(&allowed_cpus)) *dest_cpu = cpumask_any_and_distribute(&allowed_cpus, new_mask); } } /** * android_rvh_rto_next-cpu: disallow halted cpus for irq workfunctions */ static void android_rvh_rto_next_cpu(void *unused, int rto_cpu, struct cpumask *rto_mask, int *cpu) { cpumask_t allowed_cpus; if (unlikely(walt_disabled)) return; if (cpu_halted(*cpu)) { /* remove halted cpus from the valid mask, and store locally */ cpumask_andnot(&allowed_cpus, rto_mask, cpu_halt_mask); *cpu = cpumask_next(rto_cpu, &allowed_cpus); } } /** * android_rvh_is_cpu_allowed: disallow cpus that are halted * * NOTE: this function will not be called if migration is disabled for the task. */ static void android_rvh_is_cpu_allowed(void *unused, struct task_struct *p, int cpu, bool *allowed) { if (unlikely(walt_disabled)) return; if (cpumask_test_cpu(cpu, cpu_halt_mask)) { cpumask_t cpus_allowed; /* default reject for any halted cpu */ *allowed = false; if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve)) { /* 32bit task in execve. allow this cpu. */ *allowed = true; return; } /* * for cfs threads, active cpus in the affinity are allowed * but halted cpus are not allowed */ cpumask_and(&cpus_allowed, cpu_active_mask, p->cpus_ptr); cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_halt_mask); if (!(p->flags & PF_KTHREAD)) { if (cpumask_empty(&cpus_allowed)) { /* * All affined cpus are inactive or halted. * Allow this cpu for user threads */ *allowed = true; } return; } /* for kthreads, dying cpus are not allowed */ cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_dying_mask); if (cpumask_empty(&cpus_allowed)) { /* * All affined cpus inactive or halted or dying. * Allow this cpu for kthreads */ *allowed = true; } } } void walt_halt_init(void) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; walt_drain_thread = kthread_run(try_drain_rqs, &drain_data, "halt_drain_rqs"); if (IS_ERR(walt_drain_thread)) { pr_err("Error creating walt drain thread\n"); return; } sched_setscheduler_nocheck(walt_drain_thread, SCHED_FIFO, ¶m); register_trace_android_rvh_get_nohz_timer_target(android_rvh_get_nohz_timer_target, NULL); register_trace_android_rvh_set_cpus_allowed_by_task( android_rvh_set_cpus_allowed_by_task, NULL); register_trace_android_rvh_rto_next_cpu(android_rvh_rto_next_cpu, NULL); register_trace_android_rvh_is_cpu_allowed(android_rvh_is_cpu_allowed, NULL); } #endif /* CONFIG_HOTPLUG_CPU */