Files
2025-08-12 22:16:57 +02:00

714 lines
18 KiB
C
Executable File

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/sched/isolation.h>
#include <trace/hooks/sched.h>
#include <walt.h>
#include "trace.h"
#ifdef CONFIG_HOTPLUG_CPU
enum pause_type {
HALT,
PARTIAL_HALT,
MAX_PAUSE_TYPE
};
/* if a cpu is halting */
struct cpumask __cpu_halt_mask;
struct cpumask __cpu_partial_halt_mask;
/* spin lock to allow calling from non-preemptible context */
static DEFINE_RAW_SPINLOCK(halt_lock);
struct halt_cpu_state {
u8 client_vote_mask[MAX_PAUSE_TYPE];
};
static DEFINE_PER_CPU(struct halt_cpu_state, halt_state);
static DEFINE_RAW_SPINLOCK(walt_drain_pending_lock);
/* the amount of time allowed for enqueue operations that happen
* just after a halt operation.
*/
#define WALT_HALT_CHECK_THRESHOLD_NS 400000
/*
* Remove a task from the runqueue and pretend that it's migrating. This
* should prevent migrations for the detached task and disallow further
* changes to tsk_cpus_allowed.
*/
void
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks)
{
walt_lockdep_assert_rq(rq, p);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, p, 0);
list_add(&p->se.group_node, tasks);
}
void attach_tasks_core(struct list_head *tasks, struct rq *rq)
{
struct task_struct *p;
walt_lockdep_assert_rq(rq, NULL);
while (!list_empty(tasks)) {
p = list_first_entry(tasks, struct task_struct, se.group_node);
list_del_init(&p->se.group_node);
BUG_ON(task_rq(p) != rq);
activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
}
}
/*
* Migrate all tasks from the rq, sleeping tasks will be migrated by
* try_to_wake_up()->select_task_rq().
*
* Called with rq->__lock held even though we'er in stop_machine() and
* there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts.
*
* The function will skip CPU pinned kthreads.
*/
static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
{
struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop;
LIST_HEAD(percpu_kthreads);
unsigned int num_pinned_kthreads = 1;
struct rq_flags orf = *rf;
int dest_cpu;
/*
* Fudge the rq selection such that the below task selection loop
* doesn't get stuck on the currently eligible stop task.
*
* We're currently inside stop_machine() and the rq is either stuck
* in the stop_machine_cpu_stop() loop, or we're executing this code,
* either way we should never end up calling schedule() until we're
* done here.
*/
rq->stop = NULL;
/*
* put_prev_task() and pick_next_task() sched
* class method both need to have an up-to-date
* value of rq->clock[_task]
*/
update_rq_clock(rq);
#ifdef CONFIG_SCHED_DEBUG
/* note the clock update in orf */
orf.clock_update_flags |= RQCF_UPDATED;
#endif
for (;;) {
/*
* There's this thread running, bail when that's the only
* remaining thread:
*/
if (rq->nr_running == 1)
break;
next = pick_migrate_task(rq);
/*
* Argh ... no iterator for tasks, we need to remove the
* kthread from the run-queue to continue.
*/
if (is_per_cpu_kthread(next)) {
detach_one_task_core(next, rq, &percpu_kthreads);
num_pinned_kthreads += 1;
continue;
}
/*
* Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->__lock, such that holding either
* stabilizes the mask.
*
* Drop rq->__lock is not quite as disastrous as it usually is
* because !cpu_active at this point, which means load-balance
* will not interfere. Also, stop-machine.
*/
rq_unlock(rq, rf);
raw_spin_lock(&next->pi_lock);
raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
/*
* Since we're inside stop-machine, _nothing_ should have
* changed the task, WARN if weird stuff happened, because in
* that case the above rq->__lock drop is a fail too.
*/
if (task_rq(next) != rq || !task_on_rq_queued(next)) {
raw_spin_unlock(&next->pi_lock);
continue;
}
/* Find suitable destination for @next */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
if (cpu_of(rq) != dest_cpu && !is_migration_disabled(next)) {
/* only perform a required migration */
rq = __migrate_task(rq, rf, next, dest_cpu);
if (rq != dead_rq) {
rq_unlock(rq, rf);
rq = dead_rq;
*rf = orf;
raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
}
} else {
detach_one_task_core(next, rq, &percpu_kthreads);
num_pinned_kthreads += 1;
}
raw_spin_unlock(&next->pi_lock);
}
if (num_pinned_kthreads > 1)
attach_tasks_core(&percpu_kthreads, rq);
rq->stop = stop;
}
void __balance_callbacks(struct rq *rq);
static int drain_rq_cpu_stop(void *data)
{
struct rq *rq = this_rq();
struct rq_flags rf;
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
rq_lock_irqsave(rq, &rf);
/* rq lock is pinned */
/* migrate tasks assumes that the lock is pinned, and will unlock/repin */
migrate_tasks(rq, &rf);
/* __balance_callbacks can unlock and relock the rq lock. unpin */
rq_unpin_lock(rq, &rf);
/*
* service any callbacks that were accumulated, prior to unlocking. such that
* any subsequent calls to rq_lock... will see an rq->balance_callback set to
* the default (0 or balance_push_callback);
*/
wrq->enqueue_counter = 0;
__balance_callbacks(rq);
if (wrq->enqueue_counter)
WALT_BUG(WALT_BUG_WALT, NULL, "cpu: %d task was re-enqueued", cpu_of(rq));
/* lock is no longer pinned, raw unlock using same flags as locking */
raw_spin_rq_unlock_irqrestore(rq, rf.flags);
return 0;
}
static int cpu_drain_rq(unsigned int cpu)
{
if (!cpu_online(cpu))
return 0;
if (available_idle_cpu(cpu))
return 0;
/* this will schedule, must not be in atomic context */
return stop_one_cpu(cpu, drain_rq_cpu_stop, NULL);
}
struct drain_thread_data {
cpumask_t cpus_to_drain;
};
static struct drain_thread_data drain_data = {
.cpus_to_drain = { CPU_BITS_NONE }
};
static int __ref try_drain_rqs(void *data)
{
cpumask_t *cpus_ptr = &((struct drain_thread_data *)data)->cpus_to_drain;
int cpu;
unsigned long flags;
while (!kthread_should_stop()) {
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
if (cpumask_weight(cpus_ptr)) {
cpumask_t local_cpus;
cpumask_copy(&local_cpus, cpus_ptr);
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
for_each_cpu(cpu, &local_cpus)
cpu_drain_rq(cpu);
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
cpumask_andnot(cpus_ptr, cpus_ptr, &local_cpus);
}
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
set_current_state(TASK_RUNNING);
}
return 0;
}
void restrict_cpus_and_freq(struct cpumask *cpus)
{
struct cpumask restrict_cpus;
int cpu = 0;
cpumask_copy(&restrict_cpus, cpus);
if (cpumask_intersects(cpus, cpu_partial_halt_mask) &&
!cpumask_intersects(cpus, cpu_halt_mask) &&
is_state1()) {
for_each_cpu(cpu, cpus)
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
sysctl_max_freq_partial_halt;
} else {
for_each_cpu(cpu, cpus) {
cpumask_or(&restrict_cpus, &restrict_cpus, &(cpu_cluster(cpu)->cpus));
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
FREQ_QOS_MAX_DEFAULT_VALUE;
}
}
update_smart_freq_capacities();
}
struct task_struct *walt_drain_thread;
static int halt_cpus(struct cpumask *cpus, enum pause_type type)
{
int cpu;
int ret = 0;
u64 start_time = 0;
struct halt_cpu_state *halt_cpu_state;
unsigned long flags;
if (trace_halt_cpus_enabled())
start_time = sched_clock();
trace_halt_cpus_start(cpus, 1);
/* add the cpus to the halt mask */
for_each_cpu(cpu, cpus) {
if (cpu == cpumask_first(system_32bit_el0_cpumask())) {
ret = -EINVAL;
goto out;
}
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (type == HALT)
cpumask_set_cpu(cpu, cpu_halt_mask);
else
cpumask_set_cpu(cpu, cpu_partial_halt_mask);
/* guarantee mask written at this time */
wmb();
}
restrict_cpus_and_freq(cpus);
/* migrate tasks off the cpu */
if (type == HALT) {
/* signal and wakeup the drain kthread */
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
cpumask_or(&drain_data.cpus_to_drain, &drain_data.cpus_to_drain, cpus);
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
wake_up_process(walt_drain_thread);
}
out:
trace_halt_cpus(cpus, start_time, 1, ret);
return ret;
}
/* start the cpus again, and kick them to balance */
static int start_cpus(struct cpumask *cpus, enum pause_type type)
{
u64 start_time = sched_clock();
struct halt_cpu_state *halt_cpu_state;
int cpu;
trace_halt_cpus_start(cpus, 0);
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
/* guarantee the halt state is updated */
wmb();
if (type == HALT)
cpumask_clear_cpu(cpu, cpu_halt_mask);
else
cpumask_clear_cpu(cpu, cpu_partial_halt_mask);
/* kick the cpu so it can pull tasks
* after the mask has been cleared.
*/
walt_smp_call_newidle_balance(cpu);
}
restrict_cpus_and_freq(cpus);
trace_halt_cpus(cpus, start_time, 0, 0);
return 0;
}
/* update client for cpus in yield/halt mask */
static void update_clients(struct cpumask *cpus, bool halt, enum pause_client client,
enum pause_type type)
{
int cpu;
struct halt_cpu_state *halt_cpu_state;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (halt)
halt_cpu_state->client_vote_mask[type] |= client;
else
halt_cpu_state->client_vote_mask[type] &= ~client;
}
}
/* remove cpus that are already halted */
static void update_halt_cpus(struct cpumask *cpus, enum pause_type type)
{
int cpu;
struct halt_cpu_state *halt_cpu_state;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (halt_cpu_state->client_vote_mask[type])
cpumask_clear_cpu(cpu, cpus);
}
}
/* cpus will be modified */
static int walt_halt_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
{
int ret = 0;
cpumask_t requested_cpus;
unsigned long flags;
raw_spin_lock_irqsave(&halt_lock, flags);
cpumask_copy(&requested_cpus, cpus);
/* remove cpus that are already halted */
update_halt_cpus(cpus, type);
if (cpumask_empty(cpus)) {
update_clients(&requested_cpus, true, client, type);
goto unlock;
}
ret = halt_cpus(cpus, type);
if (ret < 0)
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
cpumask_pr_args(&requested_cpus));
else
update_clients(&requested_cpus, true, client, type);
unlock:
raw_spin_unlock_irqrestore(&halt_lock, flags);
return ret;
}
int walt_pause_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_halt_cpus(cpus, client, HALT);
}
EXPORT_SYMBOL_GPL(walt_pause_cpus);
int walt_partial_pause_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_halt_cpus(cpus, client, PARTIAL_HALT);
}
EXPORT_SYMBOL_GPL(walt_partial_pause_cpus);
/* cpus will be modified */
static int walt_start_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
{
int ret = 0;
cpumask_t requested_cpus;
unsigned long flags;
raw_spin_lock_irqsave(&halt_lock, flags);
cpumask_copy(&requested_cpus, cpus);
update_clients(&requested_cpus, false, client, type);
/* remove cpus that should still be halted */
update_halt_cpus(cpus, type);
ret = start_cpus(cpus, type);
if (ret < 0) {
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
cpumask_pr_args(&requested_cpus));
/* restore/increment ref counts in case of error */
update_clients(&requested_cpus, true, client, type);
}
raw_spin_unlock_irqrestore(&halt_lock, flags);
return ret;
}
int walt_resume_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_start_cpus(cpus, client, HALT);
}
EXPORT_SYMBOL_GPL(walt_resume_cpus);
int walt_partial_resume_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_start_cpus(cpus, client, PARTIAL_HALT);
}
EXPORT_SYMBOL_GPL(walt_partial_resume_cpus);
/* return true if the requested client has fully halted one of the cpus */
bool cpus_halted_by_client(struct cpumask *cpus, enum pause_client client)
{
struct halt_cpu_state *halt_cpu_state;
int cpu;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if ((bool)(halt_cpu_state->client_vote_mask[HALT] & client))
return true;
}
return false;
}
static void android_rvh_get_nohz_timer_target(void *unused, int *cpu, bool *done)
{
int i, default_cpu = -1;
struct sched_domain *sd;
cpumask_t active_unhalted;
*done = true;
cpumask_andnot(&active_unhalted, cpu_active_mask, cpu_halt_mask);
if (housekeeping_cpu(*cpu, HK_TYPE_TIMER) && !cpu_halted(*cpu)) {
if (!available_idle_cpu(*cpu))
return;
default_cpu = *cpu;
}
/*
* find first cpu halted by core control and try to avoid
* affecting externally halted cpus.
*/
if (!cpumask_weight(&active_unhalted)) {
cpumask_t tmp_pause, tmp_part_pause, tmp_halt, *tmp;
cpumask_and(&tmp_part_pause, cpu_active_mask, &cpus_part_paused_by_us);
cpumask_and(&tmp_pause, cpu_active_mask, &cpus_paused_by_us);
cpumask_and(&tmp_halt, cpu_active_mask, cpu_halt_mask);
tmp = cpumask_weight(&tmp_part_pause) ? &tmp_part_pause :
cpumask_weight(&tmp_pause) ? &tmp_pause : &tmp_halt;
for_each_cpu(i, tmp) {
if ((*cpu == i) && cpumask_weight(tmp) > 1)
continue;
*cpu = i;
return;
}
}
rcu_read_lock();
for_each_domain(*cpu, sd) {
for_each_cpu_and(i, sched_domain_span(sd),
housekeeping_cpumask(HK_TYPE_TIMER)) {
if (*cpu == i)
continue;
if (!available_idle_cpu(i) && !cpu_halted(i)) {
*cpu = i;
goto unlock;
}
}
}
if (default_cpu == -1) {
for_each_cpu_and(i, &active_unhalted,
housekeeping_cpumask(HK_TYPE_TIMER)) {
if (*cpu == i)
continue;
if (!available_idle_cpu(i)) {
*cpu = i;
goto unlock;
}
}
/* choose any active unhalted cpu */
default_cpu = cpumask_any(&active_unhalted);
if (unlikely(default_cpu >= nr_cpu_ids))
goto unlock;
}
*cpu = default_cpu;
unlock:
rcu_read_unlock();
}
/**
* android_rvh_set_cpus_allowed_by_task: disallow cpus that are halted
*
* NOTES: may be called if migration is disabled for the task
* if per-cpu-kthread, must not deliberately return an invalid cpu
* if !per-cpu-kthread, may return an invalid cpu (reject dest_cpu)
* must not change cpu in in_exec 32bit task case
*/
static void android_rvh_set_cpus_allowed_by_task(void *unused,
const struct cpumask *cpu_valid_mask,
const struct cpumask *new_mask,
struct task_struct *p,
unsigned int *dest_cpu)
{
if (unlikely(walt_disabled))
return;
/* allow kthreads to change affinity regardless of halt status of dest_cpu */
if (p->flags & PF_KTHREAD)
return;
if (cpu_halted(*dest_cpu) && !p->migration_disabled) {
cpumask_t allowed_cpus;
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve))
return;
/* remove halted cpus from the valid mask, and store locally */
cpumask_andnot(&allowed_cpus, cpu_valid_mask, cpu_halt_mask);
cpumask_and(&allowed_cpus, &allowed_cpus, new_mask);
/* do not modify dest_cpu if there are no cpus to choose from */
if (!cpumask_empty(&allowed_cpus))
*dest_cpu = cpumask_any_and_distribute(&allowed_cpus, new_mask);
}
}
/**
* android_rvh_rto_next-cpu: disallow halted cpus for irq workfunctions
*/
static void android_rvh_rto_next_cpu(void *unused, int rto_cpu, struct cpumask *rto_mask, int *cpu)
{
cpumask_t allowed_cpus;
if (unlikely(walt_disabled))
return;
if (cpu_halted(*cpu)) {
/* remove halted cpus from the valid mask, and store locally */
cpumask_andnot(&allowed_cpus, rto_mask, cpu_halt_mask);
*cpu = cpumask_next(rto_cpu, &allowed_cpus);
}
}
/**
* android_rvh_is_cpu_allowed: disallow cpus that are halted
*
* NOTE: this function will not be called if migration is disabled for the task.
*/
static void android_rvh_is_cpu_allowed(void *unused, struct task_struct *p, int cpu, bool *allowed)
{
if (unlikely(walt_disabled))
return;
if (cpumask_test_cpu(cpu, cpu_halt_mask)) {
cpumask_t cpus_allowed;
/* default reject for any halted cpu */
*allowed = false;
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve)) {
/* 32bit task in execve. allow this cpu. */
*allowed = true;
return;
}
/*
* for cfs threads, active cpus in the affinity are allowed
* but halted cpus are not allowed
*/
cpumask_and(&cpus_allowed, cpu_active_mask, p->cpus_ptr);
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_halt_mask);
if (!(p->flags & PF_KTHREAD)) {
if (cpumask_empty(&cpus_allowed)) {
/*
* All affined cpus are inactive or halted.
* Allow this cpu for user threads
*/
*allowed = true;
}
return;
}
/* for kthreads, dying cpus are not allowed */
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_dying_mask);
if (cpumask_empty(&cpus_allowed)) {
/*
* All affined cpus inactive or halted or dying.
* Allow this cpu for kthreads
*/
*allowed = true;
}
}
}
void walt_halt_init(void)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
walt_drain_thread = kthread_run(try_drain_rqs, &drain_data, "halt_drain_rqs");
if (IS_ERR(walt_drain_thread)) {
pr_err("Error creating walt drain thread\n");
return;
}
sched_setscheduler_nocheck(walt_drain_thread, SCHED_FIFO, &param);
register_trace_android_rvh_get_nohz_timer_target(android_rvh_get_nohz_timer_target, NULL);
register_trace_android_rvh_set_cpus_allowed_by_task(
android_rvh_set_cpus_allowed_by_task, NULL);
register_trace_android_rvh_rto_next_cpu(android_rvh_rto_next_cpu, NULL);
register_trace_android_rvh_is_cpu_allowed(android_rvh_is_cpu_allowed, NULL);
}
#endif /* CONFIG_HOTPLUG_CPU */