Add samsung specific changes

This commit is contained in:
2025-08-11 14:29:00 +02:00
parent c66122e619
commit 4d134a1294
2688 changed files with 1127995 additions and 11475 deletions

View File

@@ -33,3 +33,4 @@ obj-y += fair.o
obj-y += build_policy.o
obj-y += build_utility.o
obj-$(CONFIG_ANDROID_VENDOR_HOOKS) += vendor_hooks.o
obj-$(CONFIG_SCHED_WALT) += walt/

41
kernel/sched/walt/Kconfig Normal file
View File

@@ -0,0 +1,41 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# QTI WALT based scheduler
#
menu "QTI WALT based scheduler features"
config SCHED_WALT
tristate "Support window based load tracking"
depends on SMP
help
This feature will allow the scheduler to maintain a tunable window
based set of metrics for tasks and runqueues. These metrics can be
used to guide task placement as well as task frequency requirements
for cpufreq governors.
config SCHED_WALT_DEBUG
tristate "WALT debug module"
depends on SCHED_WALT
select TRACE_PREEMPT_TOGGLE
select TRACE_IRQFLAGS
help
This module provides the means of debugging long preempt and
irq disable code. This helps in identifying the scheduling
latencies. The module rely on preemptirq trace hooks and
print the stacktrace to the ftrace upon long preempt and irq
events. Sysctl knobs are available for the user to configure
the thresholds.
This module also used to crash the system to catch issues
in scenarios like RT throttling and sleeping while in atomic
context etc.
config SCHED_CONSERVATIVE_BOOST_LPM_BIAS
bool "Enable LPM bias if conservative boost is enabled"
default n
help
This feature will allow the scheduler to disable low power
modes on a cpu if conservative boost is active. The cpu
will not enter low power mode for a hysteresis time period,
which can be configured from userspace.
endmenu

View File

@@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
KCOV_INSTRUMENT := n
KCSAN_SANITIZE := n
obj-$(CONFIG_SCHED_WALT) += sched-walt.o
sched-walt-$(CONFIG_SCHED_WALT) := walt.o boost.o sched_avg.o walt_halt.o core_ctl.o trace.o input-boost.o sysctl.o cpufreq_walt.o fixup.o walt_lb.o walt_rt.o walt_cfs.o walt_tp.o walt_config.o walt_cpufreq_cycle_cntr_driver.o walt_gclk_cycle_counter_driver.o walt_cycles.o debugfs.o pipeline.o smart_freq.o mvp_locking.o
obj-$(CONFIG_SCHED_WALT_DEBUG) += sched-walt-debug.o
sched-walt-debug-$(CONFIG_SCHED_WALT_DEBUG) := walt_debug.o preemptirq_long.o

359
kernel/sched/walt/boost.c Normal file
View File

@@ -0,0 +1,359 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
*/
#include <linux/of.h>
#include "walt.h"
#include "trace.h"
/*
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
* with higher capacity than those where a task would have normally
* ended up with their load characteristics. Any entity enabling
* boost is responsible for disabling it as well.
*/
unsigned int sched_boost_type;
enum sched_boost_policy boost_policy;
static DEFINE_MUTEX(boost_mutex);
void walt_init_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = false;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] = false;
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = false;
}
void walt_init_topapp_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = true;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = true;
}
void walt_init_foreground_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = false;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = true;
}
void walt_init_foregroundboost_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = false;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = true;
}
/*
* Scheduler boost type and boost policy might at first seem unrelated,
* however, there exists a connection between them that will allow us
* to use them interchangeably during placement decisions. We'll explain
* the connection here in one possible way so that the implications are
* clear when looking at placement policies.
*
* When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
* When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
* neither be none nor RESTRAINED.
*/
static void set_boost_policy(int type)
{
if (type == NO_BOOST || type == RESTRAINED_BOOST) {
boost_policy = SCHED_BOOST_NONE;
return;
}
if (hmp_capable()) {
boost_policy = SCHED_BOOST_ON_BIG;
return;
}
boost_policy = SCHED_BOOST_ON_ALL;
}
static bool verify_boost_params(int type)
{
return type >= BALANCE_BOOST_DISABLE && type <= BALANCE_BOOST;
}
static void sched_no_boost_nop(void)
{
}
static void sched_full_throttle_boost_enter(void)
{
core_ctl_set_boost(true);
walt_enable_frequency_aggregation(true);
}
static void sched_full_throttle_boost_exit(void)
{
core_ctl_set_boost(false);
walt_enable_frequency_aggregation(false);
}
static void sched_conservative_boost_enter(void)
{
}
static void sched_conservative_boost_exit(void)
{
}
static void sched_restrained_boost_enter(void)
{
walt_enable_frequency_aggregation(true);
}
static void sched_restrained_boost_exit(void)
{
walt_enable_frequency_aggregation(false);
}
static void sched_storage_boost_enter(void)
{
core_ctl_set_boost(true);
}
static void sched_storage_boost_exit(void)
{
core_ctl_set_boost(false);
}
static void sched_balance_boost_enter(void)
{
core_ctl_set_boost(true);
}
static void sched_balance_boost_exit(void)
{
core_ctl_set_boost(false);
}
struct sched_boost_data {
int refcount;
void (*enter)(void);
void (*exit)(void);
};
static struct sched_boost_data sched_boosts[] = {
[NO_BOOST] = {
.refcount = 0,
.enter = sched_no_boost_nop,
.exit = sched_no_boost_nop,
},
[FULL_THROTTLE_BOOST] = {
.refcount = 0,
.enter = sched_full_throttle_boost_enter,
.exit = sched_full_throttle_boost_exit,
},
[CONSERVATIVE_BOOST] = {
.refcount = 0,
.enter = sched_conservative_boost_enter,
.exit = sched_conservative_boost_exit,
},
[RESTRAINED_BOOST] = {
.refcount = 0,
.enter = sched_restrained_boost_enter,
.exit = sched_restrained_boost_exit,
},
[STORAGE_BOOST] = {
.refcount = 0,
.enter = sched_storage_boost_enter,
.exit = sched_storage_boost_exit,
},
[BALANCE_BOOST] = {
.refcount = 0,
.enter = sched_balance_boost_enter,
.exit = sched_balance_boost_exit,
},
};
#define SCHED_BOOST_START FULL_THROTTLE_BOOST
#define SCHED_BOOST_END (BALANCE_BOOST + 1)
static int sched_effective_boost(void)
{
int i;
/*
* The boosts are sorted in descending order by
* priority.
*/
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++) {
if (sched_boosts[i].refcount >= 1)
return i;
}
return NO_BOOST;
}
static void sched_boost_disable(int type)
{
struct sched_boost_data *sb = &sched_boosts[type];
int next_boost, prev_boost = sched_boost_type;
if (sb->refcount <= 0)
return;
sb->refcount--;
if (sb->refcount)
return;
next_boost = sched_effective_boost();
if (next_boost == prev_boost)
return;
/*
* This boost's refcount becomes zero, so it must
* be disabled. Disable it first and then apply
* the next boost.
*/
sched_boosts[prev_boost].exit();
sched_boosts[next_boost].enter();
}
static void sched_boost_enable(int type)
{
struct sched_boost_data *sb = &sched_boosts[type];
int next_boost, prev_boost = sched_boost_type;
sb->refcount++;
if (sb->refcount != 1)
return;
/*
* This boost enable request did not come before.
* Take this new request and find the next boost
* by aggregating all the enabled boosts. If there
* is a change, disable the previous boost and enable
* the next boost.
*/
next_boost = sched_effective_boost();
if (next_boost == prev_boost)
return;
sched_boosts[prev_boost].exit();
sched_boosts[next_boost].enter();
}
static void sched_boost_disable_all(void)
{
int i;
int prev_boost = sched_boost_type;
if (prev_boost != NO_BOOST) {
sched_boosts[prev_boost].exit();
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++)
sched_boosts[i].refcount = 0;
}
}
static void _sched_set_boost(int type)
{
if (type == 0)
sched_boost_disable_all();
else if (type > 0)
sched_boost_enable(type);
else
sched_boost_disable(-type);
/*
* sysctl_sched_boost holds the boost request from
* user space which could be different from the
* effectively enabled boost. Update the effective
* boost here.
*/
sched_boost_type = sched_effective_boost();
sysctl_sched_boost = sched_boost_type;
set_boost_policy(sysctl_sched_boost);
trace_sched_set_boost(sysctl_sched_boost);
}
int sched_set_boost(int type)
{
int ret = 0;
if (unlikely(walt_disabled))
return -EAGAIN;
mutex_lock(&boost_mutex);
if (verify_boost_params(type))
_sched_set_boost(type);
else
ret = -EINVAL;
mutex_unlock(&boost_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(sched_set_boost);
int sched_boost_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
unsigned int *data = (unsigned int *)table->data;
mutex_lock(&boost_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto done;
if (verify_boost_params(*data))
_sched_set_boost(*data);
else
ret = -EINVAL;
done:
mutex_unlock(&boost_mutex);
return ret;
}
void walt_boost_init(void)
{
/* force call the callbacks for default boost */
sched_set_boost(FULL_THROTTLE_BOOST);
}

1921
kernel/sched/walt/core_ctl.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/debugfs.h>
#include <trace/hooks/sched.h>
#include "walt.h"
#include "trace.h"
unsigned int debugfs_walt_features;
static struct dentry *debugfs_walt;
void walt_register_debugfs(void)
{
debugfs_walt = debugfs_create_dir("walt", NULL);
debugfs_create_u32("walt_features", 0644, debugfs_walt, &debugfs_walt_features);
}

185
kernel/sched/walt/fixup.c Normal file
View File

@@ -0,0 +1,185 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2021-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <trace/hooks/cpufreq.h>
#include <trace/hooks/topology.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include "walt.h"
unsigned int cpuinfo_max_freq_cached;
char sched_lib_name[LIB_PATH_LENGTH];
char sched_lib_task[LIB_PATH_LENGTH];
unsigned int sched_lib_mask_force;
static bool is_sched_lib_based_app(pid_t pid)
{
const char *name = NULL;
char *libname, *lib_list;
struct vm_area_struct *vma;
char path_buf[LIB_PATH_LENGTH];
char *tmp_lib_name;
bool found = false;
struct task_struct *p;
struct mm_struct *mm;
if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0)
return false;
tmp_lib_name = kmalloc(LIB_PATH_LENGTH, GFP_KERNEL);
if (!tmp_lib_name)
return false;
rcu_read_lock();
p = pid ? get_pid_task(find_vpid(pid), PIDTYPE_PID) : get_task_struct(current);
rcu_read_unlock();
if (!p) {
kfree(tmp_lib_name);
return false;
}
mm = get_task_mm(p);
if (mm) {
MA_STATE(mas, &mm->mm_mt, 0, 0);
down_read(&mm->mmap_lock);
mas_for_each(&mas, vma, ULONG_MAX) {
if (vma->vm_file && vma->vm_flags & VM_EXEC) {
name = d_path(&vma->vm_file->f_path,
path_buf, LIB_PATH_LENGTH);
if (IS_ERR(name))
goto release_sem;
strscpy(tmp_lib_name, sched_lib_name, LIB_PATH_LENGTH);
lib_list = tmp_lib_name;
while ((libname = strsep(&lib_list, ","))) {
libname = skip_spaces(libname);
if (strnstr(name, libname,
strnlen(name, LIB_PATH_LENGTH))) {
found = true;
goto release_sem;
}
}
}
}
release_sem:
up_read(&mm->mmap_lock);
mmput(mm);
}
put_task_struct(p);
kfree(tmp_lib_name);
return found;
}
bool is_sched_lib_task(void)
{
if (strnlen(sched_lib_task, LIB_PATH_LENGTH) == 0)
return false;
if (strnstr(current->comm, sched_lib_task, strnlen(current->comm, LIB_PATH_LENGTH)))
return true;
return false;
}
static char cpu_cap_fixup_target[TASK_COMM_LEN];
static int proc_cpu_capacity_fixup_target_show(struct seq_file *m, void *data)
{
seq_printf(m, "%s\n", cpu_cap_fixup_target);
return 0;
}
static int proc_cpu_capacity_fixup_target_open(struct inode *inode,
struct file *file)
{
return single_open(file, proc_cpu_capacity_fixup_target_show, NULL);
}
static ssize_t proc_cpu_capacity_fixup_target_write(struct file *file,
const char __user *buf, size_t count, loff_t *offs)
{
char temp[TASK_COMM_LEN] = {0, };
int len = 0;
len = (count > TASK_COMM_LEN) ? TASK_COMM_LEN : count;
if (copy_from_user(temp, buf, len))
return -EFAULT;
if (temp[len - 1] == '\n')
temp[len - 1] = '\0';
strlcpy(cpu_cap_fixup_target, temp, TASK_COMM_LEN);
return count;
}
static const struct proc_ops proc_cpu_capacity_fixup_target_op = {
.proc_open = proc_cpu_capacity_fixup_target_open,
.proc_write = proc_cpu_capacity_fixup_target_write,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
static void android_rvh_show_max_freq(void *unused, struct cpufreq_policy *policy,
unsigned int *max_freq)
{
int curr_len = 0;
if (!cpuinfo_max_freq_cached)
return;
curr_len = strnlen(current->comm, TASK_COMM_LEN);
if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
*max_freq = cpuinfo_max_freq_cached;
return;
}
}
if (!(BIT(policy->cpu) & sched_lib_mask_force))
return;
if (is_sched_lib_based_app(current->pid) || is_sched_lib_task())
*max_freq = cpuinfo_max_freq_cached << 1;
}
static void android_rvh_cpu_capacity_show(void *unused,
unsigned long *capacity, int cpu)
{
int curr_len = 0;
curr_len = strnlen(current->comm, TASK_COMM_LEN);
if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
*capacity = SCHED_CAPACITY_SCALE;
return;
}
}
if (!soc_sched_lib_name_capacity)
return;
if ((is_sched_lib_based_app(current->pid) || is_sched_lib_task()) &&
cpu < soc_sched_lib_name_capacity)
*capacity = 100;
}
void walt_fixup_init(void)
{
if (!proc_create("cpu_capacity_fixup_target",
0660, NULL, &proc_cpu_capacity_fixup_target_op))
pr_err("Failed to register 'cpu_capacity_fixup_target'\n");
register_trace_android_rvh_show_max_freq(android_rvh_show_max_freq, NULL);
register_trace_android_rvh_cpu_capacity_show(android_rvh_cpu_capacity_show, NULL);
}

View File

@@ -0,0 +1,300 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013-2015,2017,2019-2021, The Linux Foundation. All rights reserved.
*/
#define pr_fmt(fmt) "input-boost: " fmt
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/time.h>
#include <linux/sysfs.h>
#include <linux/pm_qos.h>
#include "walt.h"
#define input_boost_attr_rw(_name) \
static struct kobj_attribute _name##_attr = \
__ATTR(_name, 0644, show_##_name, store_##_name)
#define show_one(file_name) \
static ssize_t show_##file_name \
(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
{ \
return scnprintf(buf, PAGE_SIZE, "%u\n", file_name); \
}
#define store_one(file_name) \
static ssize_t store_##file_name \
(struct kobject *kobj, struct kobj_attribute *attr, \
const char *buf, size_t count) \
{ \
\
sscanf(buf, "%u", &file_name); \
return count; \
}
struct cpu_sync {
int cpu;
unsigned int input_boost_min;
unsigned int input_boost_freq;
};
static DEFINE_PER_CPU(struct cpu_sync, sync_info);
static struct workqueue_struct *input_boost_wq;
static struct work_struct input_boost_work;
static bool sched_boost_active;
static struct delayed_work input_boost_rem;
static u64 last_input_time;
#define MIN_INPUT_INTERVAL (150 * USEC_PER_MSEC)
static DEFINE_PER_CPU(struct freq_qos_request, qos_req);
static void boost_adjust_notify(struct cpufreq_policy *policy)
{
unsigned int cpu = policy->cpu;
struct cpu_sync *s = &per_cpu(sync_info, cpu);
unsigned int ib_min = s->input_boost_min;
struct freq_qos_request *req = &per_cpu(qos_req, cpu);
int ret;
pr_debug("CPU%u policy min before boost: %u kHz\n",
cpu, policy->min);
pr_debug("CPU%u boost min: %u kHz\n", cpu, ib_min);
ret = freq_qos_update_request(req, ib_min);
if (ret < 0)
pr_err("Failed to update freq constraint in boost_adjust: %d\n",
ib_min);
pr_debug("CPU%u policy min after boost: %u kHz\n", cpu, policy->min);
}
static void update_policy_online(void)
{
unsigned int i;
struct cpufreq_policy *policy;
struct cpumask online_cpus;
/* Re-evaluate policy to trigger adjust notifier for online CPUs */
cpus_read_lock();
online_cpus = *cpu_online_mask;
for_each_cpu(i, &online_cpus) {
policy = cpufreq_cpu_get(i);
if (!policy) {
pr_err("%s: cpufreq policy not found for cpu%d\n",
__func__, i);
return;
}
cpumask_andnot(&online_cpus, &online_cpus,
policy->related_cpus);
boost_adjust_notify(policy);
}
cpus_read_unlock();
}
static void do_input_boost_rem(struct work_struct *work)
{
unsigned int i, ret;
struct cpu_sync *i_sync_info;
/* Reset the input_boost_min for all CPUs in the system */
pr_debug("Resetting input boost min for all CPUs\n");
for_each_possible_cpu(i) {
i_sync_info = &per_cpu(sync_info, i);
i_sync_info->input_boost_min = 0;
}
/* Update policies for all online CPUs */
update_policy_online();
if (sched_boost_active) {
ret = sched_set_boost(0);
if (!ret)
pr_err("input-boost: sched boost disable failed\n");
sched_boost_active = false;
}
}
static void do_input_boost(struct work_struct *work)
{
unsigned int cpu, ret;
struct cpu_sync *i_sync_info;
cancel_delayed_work_sync(&input_boost_rem);
if (sched_boost_active) {
sched_set_boost(0);
sched_boost_active = false;
}
/* Set the input_boost_min for all CPUs in the system */
pr_debug("Setting input boost min for all CPUs\n");
for_each_possible_cpu(cpu) {
i_sync_info = &per_cpu(sync_info, cpu);
i_sync_info->input_boost_min = sysctl_input_boost_freq[cpu];
}
/* Update policies for all online CPUs */
update_policy_online();
/* Enable scheduler boost to migrate tasks to big cluster */
if (sysctl_sched_boost_on_input > 0) {
ret = sched_set_boost(sysctl_sched_boost_on_input);
if (ret)
pr_err("input-boost: sched boost enable failed\n");
else
sched_boost_active = true;
}
queue_delayed_work(input_boost_wq, &input_boost_rem,
msecs_to_jiffies(sysctl_input_boost_ms));
}
static void inputboost_input_event(struct input_handle *handle,
unsigned int type, unsigned int code, int value)
{
u64 now;
int cpu;
int enabled = 0;
for_each_possible_cpu(cpu) {
if (sysctl_input_boost_freq[cpu] > 0) {
enabled = 1;
break;
}
}
if (!enabled)
return;
now = ktime_to_us(ktime_get());
if (now - last_input_time < MIN_INPUT_INTERVAL)
return;
if (work_pending(&input_boost_work))
return;
queue_work(input_boost_wq, &input_boost_work);
last_input_time = ktime_to_us(ktime_get());
}
static int inputboost_input_connect(struct input_handler *handler,
struct input_dev *dev, const struct input_device_id *id)
{
struct input_handle *handle;
int error;
handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
if (!handle)
return -ENOMEM;
handle->dev = dev;
handle->handler = handler;
handle->name = "cpufreq";
error = input_register_handle(handle);
if (error)
goto err2;
error = input_open_device(handle);
if (error)
goto err1;
return 0;
err1:
input_unregister_handle(handle);
err2:
kfree(handle);
return error;
}
static void inputboost_input_disconnect(struct input_handle *handle)
{
input_close_device(handle);
input_unregister_handle(handle);
kfree(handle);
}
static const struct input_device_id inputboost_ids[] = {
/* multi-touch touchscreen */
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT_MASK(EV_ABS) },
.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
BIT_MASK(ABS_MT_POSITION_X) |
BIT_MASK(ABS_MT_POSITION_Y)
},
},
/* touchpad */
{
.flags = INPUT_DEVICE_ID_MATCH_KEYBIT |
INPUT_DEVICE_ID_MATCH_ABSBIT,
.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
.absbit = { [BIT_WORD(ABS_X)] =
BIT_MASK(ABS_X) | BIT_MASK(ABS_Y)
},
},
/* Keypad */
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
.evbit = { BIT_MASK(EV_KEY) },
},
{ },
};
static struct input_handler inputboost_input_handler = {
.event = inputboost_input_event,
.connect = inputboost_input_connect,
.disconnect = inputboost_input_disconnect,
.name = "input-boost",
.id_table = inputboost_ids,
};
struct kobject *input_boost_kobj;
int input_boost_init(void)
{
int cpu, ret;
struct cpu_sync *s;
struct cpufreq_policy *policy;
struct freq_qos_request *req;
input_boost_wq = alloc_workqueue("inputboost_wq", WQ_HIGHPRI, 0);
if (!input_boost_wq)
return -EFAULT;
INIT_WORK(&input_boost_work, do_input_boost);
INIT_DELAYED_WORK(&input_boost_rem, do_input_boost_rem);
for_each_possible_cpu(cpu) {
s = &per_cpu(sync_info, cpu);
s->cpu = cpu;
req = &per_cpu(qos_req, cpu);
policy = cpufreq_cpu_get(cpu);
if (!policy) {
pr_err("%s: cpufreq policy not found for cpu%d\n",
__func__, cpu);
return -ESRCH;
}
ret = freq_qos_add_request(&policy->constraints, req,
FREQ_QOS_MIN, policy->min);
if (ret < 0) {
pr_err("%s: Failed to add freq constraint (%d)\n",
__func__, ret);
return ret;
}
}
ret = input_register_handler(&inputboost_input_handler);
return 0;
}

View File

@@ -0,0 +1,44 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <trace/hooks/dtask.h>
#include "../../locking/mutex.h"
#include "walt.h"
static void android_vh_alter_mutex_list_add(void *unused, struct mutex *lock,
struct mutex_waiter *waiter, struct list_head *list,
bool *already_on_list)
{
struct walt_task_struct *wts_waiter =
(struct walt_task_struct *)current->android_vendor_data1;
struct mutex_waiter *pos = NULL;
struct mutex_waiter *n = NULL;
struct list_head *head = list;
struct walt_task_struct *wts;
if (unlikely(walt_disabled))
return;
if (!lock || !waiter || !list)
return;
if (!is_mvp(wts_waiter))
return;
list_for_each_entry_safe(pos, n, head, list) {
wts = (struct walt_task_struct *)
((struct task_struct *)(pos->task)->android_vendor_data1);
if (!is_mvp(wts)) {
list_add(&waiter->list, pos->list.prev);
*already_on_list = true;
break;
}
}
}
void walt_mvp_lock_ordering_init(void)
{
register_trace_android_vh_alter_mutex_list_add(android_vh_alter_mutex_list_add, NULL);
}

View File

@@ -0,0 +1,239 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2013-2014, 2017, 2021, The Linux Foundation. All rights reserved.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM perf_trace_counters
#if !defined(_PERF_TRACE_COUNTERS_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _PERF_TRACE_COUNTERS_H_
/* Ctr index for PMCNTENSET/CLR */
#define CC 0x80000000
#define C0 0x1
#define C1 0x2
#define C2 0x4
#define C3 0x8
#define C4 0x10
#define C5 0x20
#define C_ALL (CC | C0 | C1 | C2 | C3 | C4 | C5)
#define TYPE_MASK 0xFFFF
#define NUM_L1_CTRS 6
#define NUM_AMU_CTRS 3
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/tracepoint.h>
DECLARE_PER_CPU(u32, cntenset_val);
DECLARE_PER_CPU(unsigned long, previous_ccnt);
DECLARE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
DECLARE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
unsigned int state;
#ifdef CONFIG_SCHED_DEBUG
BUG_ON(p != current);
#endif /* CONFIG_SCHED_DEBUG */
/*
* Preemption ignores task state, therefore preempted tasks are always
* RUNNING (we will not have dequeued if state != RUNNING).
*/
if (preempt)
return TASK_REPORT_MAX;
/*
* task_state_index() uses fls() and returns a value from 0-8 range.
* Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
* it for left shift operation to get the correct task->state
* mapping.
*/
state = task_state_index(p);
return state ? (1 << (state - 1)) : state;
}
#endif /* CREATE_TRACE_POINTS */
TRACE_EVENT(sched_switch_with_ctrs,
TP_PROTO(bool preempt,
struct task_struct *prev,
struct task_struct *next),
TP_ARGS(preempt, prev, next),
TP_STRUCT__entry(
__field(pid_t, prev_pid)
__field(pid_t, next_pid)
__array(char, prev_comm, TASK_COMM_LEN)
__array(char, next_comm, TASK_COMM_LEN)
__field(long, prev_state)
__field(unsigned long, cctr)
__field(unsigned long, ctr0)
__field(unsigned long, ctr1)
__field(unsigned long, ctr2)
__field(unsigned long, ctr3)
__field(unsigned long, ctr4)
__field(unsigned long, ctr5)
__field(unsigned long, amu0)
__field(unsigned long, amu1)
__field(unsigned long, amu2)
),
TP_fast_assign(
u32 cpu = smp_processor_id();
u32 i;
u32 cnten_val;
unsigned long total_ccnt = 0;
unsigned long total_cnt = 0;
unsigned long amu_cnt = 0;
unsigned long delta_l1_cnts[NUM_L1_CTRS] = {0};
unsigned long delta_amu_cnts[NUM_AMU_CTRS] = {0};
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_state = __trace_sched_switch_state(preempt, prev);
__entry->prev_pid = prev->pid;
__entry->next_pid = next->pid;
cnten_val = per_cpu(cntenset_val, cpu);
if (cnten_val & CC) {
/* Read value */
total_ccnt = read_sysreg(pmccntr_el0);
__entry->cctr = total_ccnt -
per_cpu(previous_ccnt, cpu);
per_cpu(previous_ccnt, cpu) = total_ccnt;
}
for (i = 0; i < NUM_L1_CTRS; i++) {
if (cnten_val & (1 << i)) {
/* Select */
write_sysreg(i, pmselr_el0);
isb();
/* Read value */
total_cnt = read_sysreg(pmxevcntr_el0);
delta_l1_cnts[i] = total_cnt -
per_cpu(previous_l1_cnts[i], cpu);
per_cpu(previous_l1_cnts[i], cpu) =
total_cnt;
} else
delta_l1_cnts[i] = 0;
}
if (IS_ENABLED(CONFIG_ARM64_AMU_EXTN)) {
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
delta_amu_cnts[0] = amu_cnt -
per_cpu(previous_amu_cnts[0], cpu);
per_cpu(previous_amu_cnts[0], cpu) = amu_cnt;
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
delta_amu_cnts[1] = amu_cnt -
per_cpu(previous_amu_cnts[1], cpu);
per_cpu(previous_amu_cnts[1], cpu) = amu_cnt;
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_MEM_STALL);
delta_amu_cnts[2] = amu_cnt -
per_cpu(previous_amu_cnts[2], cpu);
per_cpu(previous_amu_cnts[2], cpu) = amu_cnt;
}
__entry->ctr0 = delta_l1_cnts[0];
__entry->ctr1 = delta_l1_cnts[1];
__entry->ctr2 = delta_l1_cnts[2];
__entry->ctr3 = delta_l1_cnts[3];
__entry->ctr4 = delta_l1_cnts[4];
__entry->ctr5 = delta_l1_cnts[5];
__entry->amu0 = delta_amu_cnts[0];
__entry->amu1 = delta_amu_cnts[1];
__entry->amu2 = delta_amu_cnts[2];
),
TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s next_pid=%d CCNTR=%lu CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu, CYC: %lu, INST: %lu, STALL: %lu",
__entry->prev_comm, __entry->prev_pid,
(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
__print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
{ TASK_INTERRUPTIBLE, "S" },
{ TASK_UNINTERRUPTIBLE, "D" },
{ __TASK_STOPPED, "T" },
{ __TASK_TRACED, "t" },
{ EXIT_DEAD, "X" },
{ EXIT_ZOMBIE, "Z" },
{ TASK_PARKED, "P" },
{ TASK_DEAD, "I" }) :
"R",
__entry->prev_state & TASK_REPORT_MAX ? "+" : "",
__entry->next_comm,
__entry->next_pid,
__entry->cctr,
__entry->ctr0, __entry->ctr1,
__entry->ctr2, __entry->ctr3,
__entry->ctr4, __entry->ctr5,
__entry->amu0, __entry->amu1,
__entry->amu2)
);
TRACE_EVENT(sched_switch_ctrs_cfg,
TP_PROTO(int cpu),
TP_ARGS(cpu),
TP_STRUCT__entry(
__field(int, cpu)
__field(unsigned long, ctr0)
__field(unsigned long, ctr1)
__field(unsigned long, ctr2)
__field(unsigned long, ctr3)
__field(unsigned long, ctr4)
__field(unsigned long, ctr5)
),
TP_fast_assign(
u32 i;
u32 cnten_val;
u32 ctr_type[NUM_L1_CTRS] = {0};
cnten_val = per_cpu(cntenset_val, cpu);
for (i = 0; i < NUM_L1_CTRS; i++) {
if (cnten_val & (1 << i)) {
/* Select */
write_sysreg(i, pmselr_el0);
isb();
/* Read type */
ctr_type[i] = read_sysreg(pmxevtyper_el0)
& TYPE_MASK;
} else
ctr_type[i] = 0;
}
__entry->cpu = cpu;
__entry->ctr0 = ctr_type[0];
__entry->ctr1 = ctr_type[1];
__entry->ctr2 = ctr_type[2];
__entry->ctr3 = ctr_type[3];
__entry->ctr4 = ctr_type[4];
__entry->ctr5 = ctr_type[5];
),
TP_printk("cpu=%d CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu",
__entry->cpu,
__entry->ctr0, __entry->ctr1,
__entry->ctr2, __entry->ctr3,
__entry->ctr4, __entry->ctr5)
);
#endif
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../kernel/sched/walt
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE perf_trace_counters
#include <trace/define_trace.h>

View File

@@ -0,0 +1,762 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "walt.h"
#include "trace.h"
static DEFINE_RAW_SPINLOCK(pipeline_lock);
static struct walt_task_struct *pipeline_wts[WALT_NR_CPUS];
int pipeline_nr;
static DEFINE_RAW_SPINLOCK(heavy_lock);
static struct walt_task_struct *heavy_wts[MAX_NR_PIPELINE];
bool pipeline_pinning;
static inline int pipeline_demand(struct walt_task_struct *wts)
{
return scale_time_to_util(wts->coloc_demand);
}
int add_pipeline(struct walt_task_struct *wts)
{
int i, pos = -1, ret = -ENOSPC;
unsigned long flags;
int max_nr_pipeline = cpumask_weight(&cpus_for_pipeline);
if (unlikely(walt_disabled))
return -EAGAIN;
raw_spin_lock_irqsave(&pipeline_lock, flags);
for (i = 0; i < max_nr_pipeline; i++) {
if (wts == pipeline_wts[i]) {
ret = 0;
goto out;
}
if (pipeline_wts[i] == NULL)
pos = i;
}
if (pos != -1) {
pipeline_wts[pos] = wts;
pipeline_nr++;
ret = 0;
}
out:
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
return ret;
}
int remove_pipeline(struct walt_task_struct *wts)
{
int i, j, ret = 0;
unsigned long flags;
if (unlikely(walt_disabled))
return -EAGAIN;
raw_spin_lock_irqsave(&pipeline_lock, flags);
for (i = 0; i < WALT_NR_CPUS; i++) {
if (wts == pipeline_wts[i]) {
wts->low_latency &= ~WALT_LOW_LATENCY_PIPELINE_BIT;
pipeline_wts[i] = NULL;
pipeline_nr--;
for (j = i; j < WALT_NR_CPUS - 1; j++) {
pipeline_wts[j] = pipeline_wts[j + 1];
pipeline_wts[j + 1] = NULL;
}
goto out;
}
}
out:
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
return ret;
}
int remove_heavy(struct walt_task_struct *wts)
{
int i, j, ret = 0;
unsigned long flags;
if (unlikely(walt_disabled))
return -EAGAIN;
raw_spin_lock_irqsave(&heavy_lock, flags);
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (wts == heavy_wts[i]) {
wts->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts[i] = NULL;
have_heavy_list--;
for (j = i; j < MAX_NR_PIPELINE - 1; j++) {
heavy_wts[j] = heavy_wts[j + 1];
heavy_wts[j + 1] = NULL;
}
goto out;
}
}
out:
raw_spin_unlock_irqrestore(&heavy_lock, flags);
return ret;
}
void remove_special_task(void)
{
unsigned long flags;
raw_spin_lock_irqsave(&heavy_lock, flags);
/*
* Although the pipeline special task designation is removed,
* if the task is not dead (i.e. this function was called from sysctl context)
* the task will continue to enjoy pipeline priveleges until the next update in
* find_heaviest_topapp()
*/
pipeline_special_task = NULL;
raw_spin_unlock_irqrestore(&heavy_lock, flags);
}
void set_special_task(struct task_struct *pipeline_special_local)
{
unsigned long flags;
raw_spin_lock_irqsave(&heavy_lock, flags);
pipeline_special_task = pipeline_special_local;
raw_spin_unlock_irqrestore(&heavy_lock, flags);
}
cpumask_t cpus_for_pipeline = { CPU_BITS_NONE };
/* always set unisolation for max cluster, for pipeline tasks */
static inline void pipeline_set_unisolation(bool set, int flag)
{
static bool unisolation_state;
struct walt_sched_cluster *cluster;
static unsigned int enable_pipeline_unisolation;
if (!set)
enable_pipeline_unisolation &= ~(1 << flag);
else
enable_pipeline_unisolation |= (1 << flag);
if (unisolation_state && !enable_pipeline_unisolation) {
unisolation_state = false;
for_each_sched_cluster(cluster) {
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
core_ctl_set_cluster_boost(cluster->id, false);
}
} else if (!unisolation_state && enable_pipeline_unisolation) {
unisolation_state = true;
for_each_sched_cluster(cluster) {
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
core_ctl_set_cluster_boost(cluster->id, true);
}
}
}
/*
* sysctl_sched_heavy_nr or sysctl_sched_pipeline_util_thres can change at any moment in time.
* as a result, the ability to set/clear unisolation state for a particular type of pipeline, is
* hindered. Detect a transition and reset the unisolation state of the pipeline method no longer
* in use.
*/
static inline void pipeline_reset_unisolation_state(void)
{
static bool last_auto_pipeline;
if ((sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres) && !last_auto_pipeline) {
pipeline_set_unisolation(false, MANUAL_PIPELINE);
last_auto_pipeline = true;
} else if (!sysctl_sched_heavy_nr &&
!sysctl_sched_pipeline_util_thres && last_auto_pipeline) {
pipeline_set_unisolation(false, AUTO_PIPELINE);
last_auto_pipeline = false;
}
}
static inline bool should_pipeline_pin_special(void)
{
if (!pipeline_special_task)
return false;
if (!heavy_wts[MAX_NR_PIPELINE - 1])
return false;
if (pipeline_demand(heavy_wts[0]) <= sysctl_pipeline_special_task_util_thres)
return true;
if (pipeline_demand(heavy_wts[1]) <= sysctl_pipeline_non_special_task_util_thres)
return true;
if (pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_low_pct, 100)))
return false;
if (!pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_high_pct, 100)))
return false;
return true;
}
cpumask_t last_available_big_cpus = CPU_MASK_NONE;
int have_heavy_list;
u32 total_util;
bool find_heaviest_topapp(u64 window_start)
{
struct walt_related_thread_group *grp;
struct walt_task_struct *wts;
unsigned long flags;
static u64 last_rearrange_ns;
int i, j, start;
struct walt_task_struct *heavy_wts_to_drop[MAX_NR_PIPELINE];
if (num_sched_clusters < 2)
return false;
/* lazy enabling disabling until 100mS for colocation or heavy_nr change */
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
if (!grp || (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres) ||
sched_boost_type) {
if (have_heavy_list) {
raw_spin_lock_irqsave(&heavy_lock, flags);
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (heavy_wts[i]) {
heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts[i]->pipeline_cpu = -1;
heavy_wts[i] = NULL;
}
}
raw_spin_unlock_irqrestore(&heavy_lock, flags);
have_heavy_list = 0;
pipeline_set_unisolation(false, AUTO_PIPELINE);
}
return false;
}
if (last_rearrange_ns && (window_start < (last_rearrange_ns + 100 * MSEC_TO_NSEC)))
return false;
last_rearrange_ns = window_start;
raw_spin_lock_irqsave(&grp->lock, flags);
raw_spin_lock(&heavy_lock);
/* remember the old ones in _to_drop[] */
for (i = 0; i < MAX_NR_PIPELINE; i++) {
heavy_wts_to_drop[i] = heavy_wts[i];
heavy_wts[i] = NULL;
}
/* Assign user specified one (if exists) to slot 0*/
if (pipeline_special_task) {
heavy_wts[0] = (struct walt_task_struct *)
pipeline_special_task->android_vendor_data1;
start = 1;
} else {
start = 0;
}
/*
* Ensure that heavy_wts either contains the top 3 top-app tasks,
* or the user defined heavy task followed by the top 2 top-app tasks
*/
list_for_each_entry(wts, &grp->tasks, grp_list) {
struct walt_task_struct *to_be_placed_wts = wts;
/* if the task hasnt seen action recently skip it */
if (wts->mark_start < window_start - (sched_ravg_window * 2))
continue;
/* skip user defined task as it's already part of the list*/
if (pipeline_special_task && (wts == heavy_wts[0]))
continue;
for (i = start; i < MAX_NR_PIPELINE; i++) {
if (!heavy_wts[i]) {
heavy_wts[i] = to_be_placed_wts;
break;
} else if (pipeline_demand(to_be_placed_wts) >=
pipeline_demand(heavy_wts[i])) {
struct walt_task_struct *tmp;
tmp = heavy_wts[i];
heavy_wts[i] = to_be_placed_wts;
to_be_placed_wts = tmp;
}
}
}
/*
* Determine how many of the top three pipeline tasks
* If "sched_heavy_nr" node is set, the util threshold is ignored.
*/
total_util = 0;
if (sysctl_sched_heavy_nr) {
for (i = sysctl_sched_heavy_nr; i < MAX_NR_PIPELINE; i++)
heavy_wts[i] = NULL;
} else {
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (heavy_wts[i])
total_util += pipeline_demand(heavy_wts[i]);
}
if (total_util < sysctl_sched_pipeline_util_thres)
heavy_wts[MAX_NR_PIPELINE - 1] = NULL;
}
/* reset heavy for tasks that are no longer heavy */
for (i = 0; i < MAX_NR_PIPELINE; i++) {
bool reset = true;
if (!heavy_wts_to_drop[i])
continue;
for (j = 0; j < MAX_NR_PIPELINE; j++) {
if (!heavy_wts[j])
continue;
if (heavy_wts_to_drop[i] == heavy_wts[j]) {
reset = false;
break;
}
}
if (reset) {
heavy_wts_to_drop[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts_to_drop[i]->pipeline_cpu = -1;
}
if (heavy_wts[i]) {
heavy_wts[i]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
}
}
if (heavy_wts[MAX_NR_PIPELINE - 1])
pipeline_set_unisolation(true, AUTO_PIPELINE);
else
pipeline_set_unisolation(false, AUTO_PIPELINE);
raw_spin_unlock(&heavy_lock);
raw_spin_unlock_irqrestore(&grp->lock, flags);
return true;
}
void assign_heaviest_topapp(bool found_topapp)
{
int i;
struct walt_task_struct *wts;
if (!found_topapp)
return;
raw_spin_lock(&heavy_lock);
/* start with non-prime cpus chosen for this chipset (e.g. golds) */
cpumask_and(&last_available_big_cpus, cpu_online_mask, &cpus_for_pipeline);
cpumask_andnot(&last_available_big_cpus, &last_available_big_cpus, cpu_halt_mask);
/*
* Ensure the special task is only pinned if there are 3 auto pipeline tasks and
* check certain demand conditions between special pipeline task and the largest
* non-special pipeline task.
*/
if (should_pipeline_pin_special()) {
pipeline_pinning = true;
heavy_wts[0]->pipeline_cpu =
cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
heavy_wts[0]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
if (cpumask_test_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus))
cpumask_clear_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus);
} else {
pipeline_pinning = false;
}
for (i = 0; i < MAX_NR_PIPELINE; i++) {
wts = heavy_wts[i];
if (!wts)
continue;
if (i == 0 && pipeline_pinning)
continue;
if (wts->pipeline_cpu != -1) {
if (cpumask_test_cpu(wts->pipeline_cpu, &last_available_big_cpus))
cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
else
/* avoid assigning two pipelines to same cpu */
wts->pipeline_cpu = -1;
}
}
have_heavy_list = 0;
/* assign cpus and heavy status to the new heavy */
for (i = 0; i < MAX_NR_PIPELINE; i++) {
wts = heavy_wts[i];
if (!wts)
continue;
if (wts->pipeline_cpu == -1) {
wts->pipeline_cpu = cpumask_last(&last_available_big_cpus);
if (wts->pipeline_cpu >= nr_cpu_ids) {
/* drop from heavy if it can't be assigned */
heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts[i]->pipeline_cpu = -1;
heavy_wts[i] = NULL;
} else {
/*
* clear cpu from the avalilable list of pipeline cpus.
* as pipeline_cpu is assigned for the task.
*/
cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
}
}
if (wts->pipeline_cpu >= 0)
have_heavy_list++;
}
if (trace_sched_pipeline_tasks_enabled()) {
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (heavy_wts[i] != NULL)
trace_sched_pipeline_tasks(AUTO_PIPELINE, i, heavy_wts[i],
have_heavy_list, total_util, pipeline_pinning);
}
}
raw_spin_unlock(&heavy_lock);
}
static inline void swap_pipeline_with_prime_locked(struct walt_task_struct *prime_wts,
struct walt_task_struct *other_wts)
{
if (prime_wts && other_wts) {
if (pipeline_demand(prime_wts) < pipeline_demand(other_wts)) {
int cpu;
cpu = other_wts->pipeline_cpu;
other_wts->pipeline_cpu = prime_wts->pipeline_cpu;
prime_wts->pipeline_cpu = cpu;
trace_sched_pipeline_swapped(other_wts, prime_wts);
}
} else if (!prime_wts && other_wts) {
/* if prime preferred died promote gold to prime, assumes 1 prime */
other_wts->pipeline_cpu =
cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
trace_sched_pipeline_swapped(other_wts, prime_wts);
}
}
#define WINDOW_HYSTERESIS 4
static inline bool delay_rearrange(u64 window_start, int pipeline_type, bool force)
{
static u64 last_rearrange_ns[MAX_PIPELINE_TYPES];
if (!force && last_rearrange_ns[pipeline_type] &&
(window_start < (last_rearrange_ns[pipeline_type] +
(sched_ravg_window*WINDOW_HYSTERESIS))))
return true;
last_rearrange_ns[pipeline_type] = window_start;
return false;
}
static inline void find_prime_and_max_tasks(struct walt_task_struct **wts_list,
struct walt_task_struct **prime_wts,
struct walt_task_struct **other_wts)
{
int i;
int max_demand = 0;
for (i = 0; i < MAX_NR_PIPELINE; i++) {
struct walt_task_struct *wts = wts_list[i];
if (wts == NULL)
continue;
if (wts->pipeline_cpu < 0)
continue;
if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
if (prime_wts)
*prime_wts = wts;
} else if (other_wts && pipeline_demand(wts) > max_demand) {
max_demand = pipeline_demand(wts);
*other_wts = wts;
}
}
}
static inline bool is_prime_worthy(struct walt_task_struct *wts)
{
struct task_struct *p;
if (wts == NULL)
return false;
if (num_sched_clusters < 2)
return true;
p = wts_to_ts(wts);
/*
* Assume the first row of cpu arrays represents the order of clusters
* in magnitude of capacities, where the last column represents prime,
* and the second to last column represents golds
*/
return !task_fits_max(p, cpumask_last(&cpu_array[0][num_sched_clusters - 2]));
}
void rearrange_heavy(u64 window_start, bool force)
{
struct walt_task_struct *prime_wts = NULL;
struct walt_task_struct *other_wts = NULL;
unsigned long flags;
if (num_sched_clusters < 2)
return;
raw_spin_lock_irqsave(&heavy_lock, flags);
/*
* TODO: As primes are isolated under have_heavy_list < 3, and pipeline misfits are also
* disabled, setting the prime worthy task's pipeline_cpu as CPU7 could lead to the
* pipeline_cpu selection being ignored until the next run of find_heaviest_toppapp(),
* and furthermore remove the task's current gold pipeline_cpu, which could cause the
* task to start bouncing around on the golds, and ultimately lead to suboptimal behavior.
*/
if (have_heavy_list <= 2) {
find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
if (prime_wts && !is_prime_worthy(prime_wts)) {
int assign_cpu;
/* demote prime_wts, it is not worthy */
assign_cpu = cpumask_first(&last_available_big_cpus);
if (assign_cpu < nr_cpu_ids) {
prime_wts->pipeline_cpu = assign_cpu;
cpumask_clear_cpu(assign_cpu, &last_available_big_cpus);
prime_wts = NULL;
}
/* if no pipeline cpu available to assign, leave task on prime */
}
if (!prime_wts && is_prime_worthy(other_wts)) {
/* promote other_wts to prime, it is worthy */
swap_pipeline_with_prime_locked(NULL, other_wts);
}
goto out;
}
if (pipeline_pinning)
goto out;
if (delay_rearrange(window_start, AUTO_PIPELINE, force))
goto out;
if (!soc_feat(SOC_ENABLE_PIPELINE_SWAPPING_BIT) && !force)
goto out;
/* swap prime for have_heavy_list >= 3 */
find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
swap_pipeline_with_prime_locked(prime_wts, other_wts);
out:
raw_spin_unlock_irqrestore(&heavy_lock, flags);
}
void rearrange_pipeline_preferred_cpus(u64 window_start)
{
unsigned long flags;
struct walt_task_struct *wts;
bool set_unisolation = false;
u32 max_demand = 0;
struct walt_task_struct *prime_wts = NULL;
struct walt_task_struct *other_wts = NULL;
static int assign_cpu = -1;
static bool last_set_unisolation;
int i;
if (sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres)
return;
if (num_sched_clusters < 2)
return;
if (!pipeline_nr || sched_boost_type)
goto out;
if (delay_rearrange(window_start, MANUAL_PIPELINE, false))
goto out;
raw_spin_lock_irqsave(&pipeline_lock, flags);
set_unisolation = true;
for (i = 0; i < WALT_NR_CPUS; i++) {
wts = pipeline_wts[i];
if (!wts)
continue;
if (!wts->grp)
wts->pipeline_cpu = -1;
/*
* assummes that if one pipeline doesn't have preferred set,
* all pipelines too do not have it set
*/
if (wts->pipeline_cpu == -1) {
assign_cpu = cpumask_next_and(assign_cpu,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
/* reset and rotate the cpus */
assign_cpu = cpumask_next_and(-1,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
wts->pipeline_cpu = -1;
else
wts->pipeline_cpu = assign_cpu;
}
if (wts->pipeline_cpu != -1) {
if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
/* assumes just one prime */
prime_wts = wts;
} else if (pipeline_demand(wts) > max_demand) {
max_demand = pipeline_demand(wts);
other_wts = wts;
}
}
}
if (pipeline_nr <= 2) {
set_unisolation = false;
if (prime_wts && !is_prime_worthy(prime_wts)) {
/* demote prime_wts, it is not worthy */
assign_cpu = cpumask_next_and(assign_cpu,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
/* reset and rotate the cpus */
assign_cpu = cpumask_next_and(-1,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
prime_wts->pipeline_cpu = -1;
else
prime_wts->pipeline_cpu = assign_cpu;
prime_wts = NULL;
}
if (!prime_wts && is_prime_worthy(other_wts)) {
/* promote other_wts to prime, it is worthy */
swap_pipeline_with_prime_locked(NULL, other_wts);
set_unisolation = true;
}
if (prime_wts)
set_unisolation = true;
goto release_lock;
}
/* swap prime for nr_piprline >= 3 */
swap_pipeline_with_prime_locked(prime_wts, other_wts);
if (trace_sched_pipeline_tasks_enabled()) {
for (i = 0; i < WALT_NR_CPUS; i++) {
if (pipeline_wts[i] != NULL)
trace_sched_pipeline_tasks(MANUAL_PIPELINE, i, pipeline_wts[i],
pipeline_nr, 0, 0);
}
}
release_lock:
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
out:
if (set_unisolation ^ last_set_unisolation) {
pipeline_set_unisolation(set_unisolation, MANUAL_PIPELINE);
last_set_unisolation = set_unisolation;
}
}
bool pipeline_check(struct walt_rq *wrq)
{
/* found_topapp should force rearrangement */
bool found_topapp = find_heaviest_topapp(wrq->window_start);
rearrange_pipeline_preferred_cpus(wrq->window_start);
pipeline_reset_unisolation_state();
return found_topapp;
}
void pipeline_rearrange(struct walt_rq *wrq, bool found_topapp)
{
assign_heaviest_topapp(found_topapp);
rearrange_heavy(wrq->window_start, found_topapp);
}
bool enable_load_sync(int cpu)
{
if (!cpumask_test_cpu(cpu, &pipeline_sync_cpus))
return false;
if (!pipeline_in_progress())
return false;
/*
* Under manual pipeline, only load sync between the pipeline_sync_cpus, if at least one
* of the CPUs userspace has allocated for pipeline tasks corresponds to the
* pipeline_sync_cpus
*/
if (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres &&
!cpumask_intersects(&pipeline_sync_cpus, &cpus_for_pipeline))
return false;
/* Ensure to load sync only if there are 3 auto pipeline tasks */
if (have_heavy_list)
return have_heavy_list == MAX_NR_PIPELINE;
/*
* If auto pipeline is disabled, manual must be on. Ensure to load sync under manual
* pipeline only if there are 3 or more pipeline tasks
*/
return pipeline_nr >= MAX_NR_PIPELINE;
}
/*
* pipeline_fits_smaller_cpus evaluates if a pipeline task should be treated as a misfit.
* There are three possible outcomes:
* - ret -1: Continue evaluation with task_fits_max().
* - ret 0: Task should be treated as a misfit (does not fit on smaller CPUs).
* - ret 1: Task cannot be treated as a misfit (fits on smaller CPUs).
*
* If the task is assigned a pipeline CPU which is a prime CPU, ret should be 0, indicating
* the task is a misfit.
* If the number of pipeline tasks is 2 or fewer, continue evaluation of task_fits_max().
* If the number of pipeline tasks is 3 or more, ret should be 1, indicating the task fits on the
* smaller CPUs and is not a misfit.
*/
int pipeline_fits_smaller_cpus(struct task_struct *p)
{
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
unsigned int pipeline_cpu = wts->pipeline_cpu;
if (pipeline_cpu == -1)
return -1;
if (cpumask_test_cpu(pipeline_cpu, &cpu_array[0][num_sched_clusters-1]))
return 0;
if (have_heavy_list) {
if (have_heavy_list == MAX_NR_PIPELINE)
return 1;
else
return -1;
}
if (pipeline_nr >= MAX_NR_PIPELINE)
return 1;
else
return -1;
}

View File

@@ -0,0 +1,176 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2020-2021 The Linux Foundation. All rights reserved.
*/
#include <linux/ftrace.h>
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/printk.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <trace/hooks/preemptirq.h>
#define CREATE_TRACE_POINTS
#include "preemptirq_long.h"
#define IRQSOFF_SENTINEL 0x0fffDEAD
static unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000;
static unsigned int sysctl_irqsoff_tracing_threshold_ns = 5000000;
static unsigned int sysctl_irqsoff_dmesg_output_enabled;
static unsigned int sysctl_irqsoff_crash_sentinel_value;
static unsigned int sysctl_irqsoff_crash_threshold_ns = 10000000;
static unsigned int half_million = 500000;
static unsigned int one_hundred_million = 100000000;
static unsigned int one_million = 1000000;
static DEFINE_PER_CPU(u64, irq_disabled_ts);
/*
* preemption disable tracking require additional context
* to rule out false positives. see the comment in
* test_preempt_disable_long() for more details.
*/
struct preempt_store {
u64 ts;
int pid;
unsigned long ncsw;
};
static DEFINE_PER_CPU(struct preempt_store, the_ps);
static void note_irq_disable(void *u1, unsigned long u2, unsigned long u3)
{
if (is_idle_task(current))
return;
/*
* We just have to note down the time stamp here. We
* use stacktrace trigger feature to print the stacktrace.
*/
this_cpu_write(irq_disabled_ts, sched_clock());
}
static void test_irq_disable_long(void *u1, unsigned long ip, unsigned long parent_ip)
{
u64 ts = this_cpu_read(irq_disabled_ts);
if (!ts)
return;
this_cpu_write(irq_disabled_ts, 0);
ts = sched_clock() - ts;
if (ts > sysctl_irqsoff_tracing_threshold_ns) {
trace_irq_disable_long(ts, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
if (sysctl_irqsoff_dmesg_output_enabled == IRQSOFF_SENTINEL)
printk_deferred("irqs off exceeds thresh delta=%llu C:(%ps<-%ps<-%ps<-%ps)\n",
ts, (void *)CALLER_ADDR2,
(void *)CALLER_ADDR3,
(void *)CALLER_ADDR4,
(void *)CALLER_ADDR5);
}
if (sysctl_irqsoff_crash_sentinel_value == IRQSOFF_SENTINEL &&
ts > sysctl_irqsoff_crash_threshold_ns) {
printk_deferred("delta=%llu(ns) > crash_threshold=%u(ns) Task=%s\n",
ts, sysctl_irqsoff_crash_threshold_ns,
current->comm);
BUG_ON(1);
}
}
static void note_preempt_disable(void *u1, unsigned long u2, unsigned long u3)
{
struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
ps->ts = sched_clock();
ps->pid = current->pid;
ps->ncsw = current->nvcsw + current->nivcsw;
}
static void test_preempt_disable_long(void *u1, unsigned long ip,
unsigned long parent_ip)
{
struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
u64 delta = 0;
if (!ps->ts)
return;
/*
* schedule() calls __schedule() with preemption disabled.
* if we had entered idle and exiting idle now, we think
* preemption is disabled the whole time. Detect this by
* checking if the preemption is disabled across the same
* task. There is a possiblity that the same task is scheduled
* after idle. To rule out this possibility, compare the
* context switch count also.
*/
if (ps->pid == current->pid && (ps->ncsw == current->nvcsw +
current->nivcsw))
delta = sched_clock() - ps->ts;
ps->ts = 0;
if (delta > sysctl_preemptoff_tracing_threshold_ns)
trace_preempt_disable_long(delta, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
}
static struct ctl_table preemptirq_long_table[] = {
{
.procname = "preemptoff_tracing_threshold_ns",
.data = &sysctl_preemptoff_tracing_threshold_ns,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "irqsoff_tracing_threshold_ns",
.data = &sysctl_irqsoff_tracing_threshold_ns,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
.extra1 = &half_million,
.extra2 = &one_hundred_million,
},
{
.procname = "irqsoff_dmesg_output_enabled",
.data = &sysctl_irqsoff_dmesg_output_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "irqsoff_crash_sentinel_value",
.data = &sysctl_irqsoff_crash_sentinel_value,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "irqsoff_crash_threshold_ns",
.data = &sysctl_irqsoff_crash_threshold_ns,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
.extra1 = &one_million,
.extra2 = &one_hundred_million,
},
};
int preemptirq_long_init(void)
{
if (!register_sysctl("preemptirq", preemptirq_long_table)) {
pr_err("Fail to register sysctl table\n");
return -EPERM;
}
register_trace_android_rvh_irqs_disable(note_irq_disable, NULL);
register_trace_android_rvh_irqs_enable(test_irq_disable_long, NULL);
register_trace_android_rvh_preempt_disable(note_preempt_disable, NULL);
register_trace_android_rvh_preempt_enable(test_preempt_disable_long,
NULL);
return 0;
}

View File

@@ -0,0 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021 The Linux Foundation. All rights reserved.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM preemptirq_long
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#if !defined(_TRACE_PREEMPTIRQ_LONG_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PREEMPTIRQ_LONG_H
#include <linux/tracepoint.h>
/* reference preemptirq_template */
DECLARE_EVENT_CLASS(preemptirq_long_template,
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
unsigned long pparent_ip, unsigned long ppparent_ip),
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip),
TP_STRUCT__entry(
__field(u64, delta)
__field(unsigned long, caller_offs)
__field(unsigned long, parent_offs)
__field(unsigned long, pparent_offs)
__field(unsigned long, ppparent_offs)
),
TP_fast_assign(
__entry->delta = delta;
__entry->caller_offs = ip;
__entry->parent_offs = parent_ip;
__entry->pparent_offs = pparent_ip;
__entry->ppparent_offs = ppparent_ip;
),
TP_printk("delta=%llu(ns) caller=%ps <- %ps <- %ps <- %ps",
__entry->delta, (void *)__entry->caller_offs,
(void *)__entry->parent_offs, (void *)__entry->pparent_offs,
(void *)__entry->ppparent_offs)
);
DEFINE_EVENT(preemptirq_long_template, irq_disable_long,
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
unsigned long pparent_ip, unsigned long ppparent_ip),
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
DEFINE_EVENT(preemptirq_long_template, preempt_disable_long,
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
unsigned long pparent_ip, unsigned long ppparent_ip),
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
#endif /* _TRACE_PREEMPTIRQ_LONG_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@@ -0,0 +1,397 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012, 2015-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
/*
* Scheduler hook for average runqueue determination
*/
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
#include <linux/sched.h>
#include <linux/math64.h>
#include "walt.h"
#include "trace.h"
static DEFINE_PER_CPU(u64, nr_prod_sum);
static DEFINE_PER_CPU(u64, last_time);
static DEFINE_PER_CPU(int, last_time_cpu);
static DEFINE_PER_CPU(u64, nr_big_prod_sum);
static DEFINE_PER_CPU(u64, nr_trailblazer_prod_sum);
static DEFINE_PER_CPU(u64, nr);
static DEFINE_PER_CPU(u64, nr_max);
static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
static s64 last_get_time;
static DEFINE_PER_CPU(atomic64_t, busy_hyst_end_time) = ATOMIC64_INIT(0);
static DEFINE_PER_CPU(u64, hyst_time);
static DEFINE_PER_CPU(u64, coloc_hyst_busy);
static DEFINE_PER_CPU(u64, coloc_hyst_time);
static DEFINE_PER_CPU(u64, util_hyst_time);
static DEFINE_PER_CPU(u64, smart_freq_legacy_reason_hyst_ns);
#define NR_THRESHOLD_PCT 40
#define NR_THRESHOLD_TRAIL_PCT 80
#define MAX_RTGB_TIME (sysctl_sched_coloc_busy_hyst_max_ms * NSEC_PER_MSEC)
struct sched_avg_stats stats[WALT_NR_CPUS];
unsigned int cstats_util_pct[MAX_CLUSTERS];
u8 smart_freq_legacy_reason_hyst_ms[LEGACY_SMART_FREQ][WALT_NR_CPUS];
/**
* sched_get_cluster_util_pct
* @return: provide the percentage of this cluter that was used in the
* previous window.
*
* This routine may be called any number of times as needed during
* a window, but will always return the same result until window
* rollover.
*/
unsigned int sched_get_cluster_util_pct(struct walt_sched_cluster *cluster)
{
unsigned int cluster_util_pct = 0;
if (cluster->id < MAX_CLUSTERS)
cluster_util_pct = cstats_util_pct[cluster->id];
return cluster_util_pct;
}
bool trailblazer_state;
/**
* sched_get_nr_running_avg
* @return: Average nr_running, iowait and nr_big_tasks value since last poll.
* Returns the avg * 100 to return up to two decimal points
* of accuracy.
*
* Obtains the average nr_running value since the last poll.
* This function may not be called concurrently with itself.
*
* It is assumed that this function is called at most once per window
* rollover.
*/
struct sched_avg_stats *sched_get_nr_running_avg(void)
{
int cpu;
u64 curr_time = sched_clock();
u64 period = curr_time - last_get_time;
u64 tmp_nr, tmp_misfit, tmp_trailblazer;
bool any_hyst_time = false;
struct walt_sched_cluster *cluster;
bool trailblazer_cpu = false;
if (unlikely(walt_disabled))
return NULL;
if (!period)
goto done;
/* read and reset nr_running counts */
for_each_possible_cpu(cpu) {
unsigned long flags;
u64 diff;
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
curr_time = sched_clock();
diff = curr_time - per_cpu(last_time, cpu);
if ((s64)diff < 0) {
printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
cpu, curr_time, curr_time, per_cpu(last_time, cpu),
per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
WALT_PANIC(1);
}
tmp_nr = per_cpu(nr_prod_sum, cpu);
tmp_nr += per_cpu(nr, cpu) * diff;
tmp_nr = div64_u64((tmp_nr * 100), period);
tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
tmp_misfit += walt_big_tasks(cpu) * diff;
tmp_misfit = div64_u64((tmp_misfit * 100), period);
tmp_trailblazer = per_cpu(nr_trailblazer_prod_sum, cpu);
tmp_trailblazer += walt_trailblazer_tasks(cpu) * diff;
tmp_trailblazer = div64_u64((tmp_trailblazer * 100), period);
/*
* NR_THRESHOLD_PCT is to make sure that the task ran
* at least 85% in the last window to compensate any
* over estimating being done.
*/
stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
100);
stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
NR_THRESHOLD_PCT), 100);
trailblazer_cpu |= (int)div64_u64((tmp_trailblazer +
NR_THRESHOLD_TRAIL_PCT), 100);
stats[cpu].nr_max = per_cpu(nr_max, cpu);
stats[cpu].nr_scaled = tmp_nr;
trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
stats[cpu].nr_misfit, stats[cpu].nr_max,
stats[cpu].nr_scaled, trailblazer_cpu);
per_cpu(last_time, cpu) = curr_time;
per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
per_cpu(nr_prod_sum, cpu) = 0;
per_cpu(nr_big_prod_sum, cpu) = 0;
per_cpu(nr_trailblazer_prod_sum, cpu) = 0;
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
trailblazer_state = trailblazer_cpu;
/* collect cluster load stats */
for_each_sched_cluster(cluster) {
unsigned int num_cpus = cpumask_weight(&cluster->cpus);
unsigned int sum_util_pct = 0;
/* load is already scaled, see freq_policy_load/prev_runnable_sum */
for_each_cpu(cpu, &cluster->cpus) {
struct rq *rq = cpu_rq(cpu);
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
/* compute the % this cpu's utilization of the cpu capacity,
* and sum it across all cpus
*/
sum_util_pct +=
(wrq->util * 100) / arch_scale_cpu_capacity(cpu);
}
/* calculate the averge per-cpu utilization */
cstats_util_pct[cluster->id] = sum_util_pct / num_cpus;
}
for_each_possible_cpu(cpu) {
if (per_cpu(coloc_hyst_time, cpu)) {
any_hyst_time = true;
break;
}
}
if (any_hyst_time && get_rtgb_active_time() >= MAX_RTGB_TIME)
sched_update_hyst_times();
last_get_time = curr_time;
done:
return &stats[0];
}
EXPORT_SYMBOL_GPL(sched_get_nr_running_avg);
void sched_update_hyst_times(void)
{
bool rtgb_active;
int cpu;
unsigned long cpu_cap, coloc_busy_pct;
rtgb_active = is_rtgb_active() && (sched_boost_type != CONSERVATIVE_BOOST)
&& (get_rtgb_active_time() < MAX_RTGB_TIME);
for_each_possible_cpu(cpu) {
cpu_cap = arch_scale_cpu_capacity(cpu);
coloc_busy_pct = sysctl_sched_coloc_busy_hyst_cpu_busy_pct[cpu];
per_cpu(hyst_time, cpu) = (BIT(cpu)
& sysctl_sched_busy_hyst_enable_cpus) ?
sysctl_sched_busy_hyst : 0;
per_cpu(coloc_hyst_time, cpu) = ((BIT(cpu)
& sysctl_sched_coloc_busy_hyst_enable_cpus)
&& rtgb_active) ?
sysctl_sched_coloc_busy_hyst_cpu[cpu] : 0;
per_cpu(coloc_hyst_busy, cpu) = mult_frac(cpu_cap,
coloc_busy_pct, 100);
per_cpu(util_hyst_time, cpu) = (BIT(cpu)
& sysctl_sched_util_busy_hyst_enable_cpus) ?
sysctl_sched_util_busy_hyst_cpu[cpu] : 0;
}
}
#define BUSY_NR_RUN 3
#define BUSY_LOAD_FACTOR 10
static inline void update_busy_hyst_end_time(int cpu, int enq,
unsigned long prev_nr_run, u64 curr_time)
{
bool nr_run_trigger = false;
bool load_trigger = false, coloc_load_trigger = false;
u64 agg_hyst_time, total_util = 0;
bool util_load_trigger = false;
int i;
bool hyst_trigger, coloc_trigger;
bool dequeue = (enq < 0);
if (is_max_possible_cluster_cpu(cpu) && is_obet)
return;
if (!per_cpu(hyst_time, cpu) && !per_cpu(coloc_hyst_time, cpu) &&
!per_cpu(util_hyst_time, cpu) && !per_cpu(smart_freq_legacy_reason_hyst_ns, cpu))
return;
if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
nr_run_trigger = true;
if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
capacity_orig_of(cpu))
load_trigger = true;
if (dequeue && cpu_util(cpu) > per_cpu(coloc_hyst_busy, cpu))
coloc_load_trigger = true;
if (dequeue) {
for_each_possible_cpu(i) {
total_util += cpu_util(i);
if (total_util >= sysctl_sched_util_busy_hyst_cpu_util[cpu]) {
util_load_trigger = true;
break;
}
}
}
coloc_trigger = nr_run_trigger || coloc_load_trigger;
#if IS_ENABLED(CONFIG_SCHED_CONSERVATIVE_BOOST_LPM_BIAS)
hyst_trigger = nr_run_trigger || load_trigger || (sched_boost_type == CONSERVATIVE_BOOST);
#else
hyst_trigger = nr_run_trigger || load_trigger;
#endif
agg_hyst_time = max(max(hyst_trigger ? per_cpu(hyst_time, cpu) : 0,
coloc_trigger ? per_cpu(coloc_hyst_time, cpu) : 0),
util_load_trigger ? per_cpu(util_hyst_time, cpu) : 0);
agg_hyst_time = max(agg_hyst_time, per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
if (agg_hyst_time) {
atomic64_set(&per_cpu(busy_hyst_end_time, cpu),
curr_time + agg_hyst_time);
trace_sched_busy_hyst_time(cpu, agg_hyst_time, prev_nr_run,
cpu_util(cpu), per_cpu(hyst_time, cpu),
per_cpu(coloc_hyst_time, cpu),
per_cpu(util_hyst_time, cpu),
per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
}
}
int sched_busy_hyst_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
if (table->maxlen > (sizeof(unsigned int) * num_possible_cpus()))
table->maxlen = sizeof(unsigned int) * num_possible_cpus();
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
sched_update_hyst_times();
return ret;
}
/**
* sched_update_nr_prod
* @cpu: The core id of the nr running driver.
* @enq: enqueue/dequeue/misfit happening on this CPU.
* @return: N/A
*
* Update average with latest nr_running value for CPU
*/
void sched_update_nr_prod(int cpu, int enq)
{
u64 diff;
u64 curr_time;
unsigned long flags, nr_running;
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
nr_running = per_cpu(nr, cpu);
curr_time = sched_clock();
diff = curr_time - per_cpu(last_time, cpu);
if ((s64)diff < 0) {
printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
cpu, curr_time, curr_time, per_cpu(last_time, cpu),
per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
WALT_PANIC(1);
}
per_cpu(last_time, cpu) = curr_time;
per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
per_cpu(nr, cpu) = cpu_rq(cpu)->nr_running + enq;
if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
/* Don't update hyst time for misfit tasks */
if (enq)
update_busy_hyst_end_time(cpu, enq, nr_running, curr_time);
per_cpu(nr_prod_sum, cpu) += nr_running * diff;
per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
per_cpu(nr_trailblazer_prod_sum, cpu) += (u64) walt_trailblazer_tasks(cpu) * diff;
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
/*
* Returns the CPU utilization % in the last window.
*/
unsigned int sched_get_cpu_util_pct(int cpu)
{
struct rq *rq = cpu_rq(cpu);
u64 util;
unsigned long capacity, flags;
unsigned int busy;
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
raw_spin_lock_irqsave(&rq->__lock, flags);
capacity = capacity_orig_of(cpu);
util = wrq->prev_runnable_sum + wrq->grp_time.prev_runnable_sum;
util = scale_time_to_util(util);
raw_spin_unlock_irqrestore(&rq->__lock, flags);
util = (util >= capacity) ? capacity : util;
busy = div64_ul((util * 100), capacity);
return busy;
}
int sched_lpm_disallowed_time(int cpu, u64 *timeout)
{
u64 now = sched_clock();
u64 bias_end_time = atomic64_read(&per_cpu(busy_hyst_end_time, cpu));
if (unlikely(walt_disabled))
return -EAGAIN;
if (unlikely(is_reserved(cpu))) {
*timeout = 10 * NSEC_PER_MSEC;
return 0; /* shallowest c-state */
}
if (now < bias_end_time) {
*timeout = bias_end_time - now;
return 0; /* shallowest c-state */
}
return INT_MAX; /* don't care */
}
EXPORT_SYMBOL_GPL(sched_lpm_disallowed_time);
void update_smart_freq_legacy_reason_hyst_time(struct walt_sched_cluster *cluster)
{
int cpu, i;
u8 max_hyst_ms;
for_each_cpu(cpu, &cluster->cpus) {
max_hyst_ms = 0;
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
if (cluster->smart_freq_info->cluster_active_reason & BIT(i))
max_hyst_ms =
max(smart_freq_legacy_reason_hyst_ms[i][cpu],
max_hyst_ms);
}
per_cpu(smart_freq_legacy_reason_hyst_ns, cpu) = max_hyst_ms * NSEC_PER_MSEC;
}
}

View File

@@ -0,0 +1,589 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/tick.h>
#include "walt.h"
#include "trace.h"
#include <trace/events/power.h>
bool smart_freq_init_done;
char reason_dump[1024];
static DEFINE_MUTEX(freq_reason_mutex);
int sched_smart_freq_legacy_dump_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = -EINVAL, pos = 0, i, j;
if (!smart_freq_init_done)
return -EINVAL;
mutex_lock(&freq_reason_mutex);
for (j = 0; j < num_sched_clusters; j++) {
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%llu:%d\n", j, i,
default_freq_config[j].legacy_reason_config[i].freq_allowed,
default_freq_config[j].legacy_reason_config[i].hyst_ns,
!!(default_freq_config[j].smart_freq_participation_mask &
BIT(i)));
}
}
ret = proc_dostring(table, write, buffer, lenp, ppos);
mutex_unlock(&freq_reason_mutex);
return ret;
}
int sched_smart_freq_ipc_dump_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = -EINVAL, pos = 0, i, j;
if (!smart_freq_init_done)
return -EINVAL;
mutex_lock(&freq_reason_mutex);
for (j = 0; j < num_sched_clusters; j++) {
for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%lu:%llu:%d\n", j, i,
default_freq_config[j].ipc_reason_config[i].ipc,
default_freq_config[j].ipc_reason_config[i].freq_allowed,
default_freq_config[j].ipc_reason_config[i].hyst_ns,
!!(default_freq_config[j].smart_freq_ipc_participation_mask &
BIT(i)));
}
}
ret = proc_dostring(table, write, buffer, lenp, ppos);
mutex_unlock(&freq_reason_mutex);
return ret;
}
int sched_smart_freq_ipc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
int cluster_id = -1;
unsigned long no_reason_freq;
int i;
unsigned int *data = (unsigned int *)table->data;
int val[SMART_FMAX_IPC_MAX];
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(int) * SMART_FMAX_IPC_MAX,
.mode = table->mode,
};
if (!smart_freq_init_done)
return -EINVAL;
mutex_lock(&freq_reason_mutex);
if (!write) {
tmp.data = table->data;
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
goto unlock;
}
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (ret)
goto unlock;
ret = -EINVAL;
if (data == &sysctl_ipc_freq_levels_cluster0[0])
cluster_id = 0;
if (data == &sysctl_ipc_freq_levels_cluster1[0])
cluster_id = 1;
if (data == &sysctl_ipc_freq_levels_cluster2[0])
cluster_id = 2;
if (data == &sysctl_ipc_freq_levels_cluster3[0])
cluster_id = 3;
if (cluster_id == -1)
goto unlock;
if (val[0] < 0)
goto unlock;
no_reason_freq = val[0];
/* Make sure all reasons freq are larger than NO_REASON */
/* IPC/freq should be in increasing order */
for (i = 1; i < SMART_FMAX_IPC_MAX; i++) {
if (val[i] < val[i-1])
goto unlock;
}
default_freq_config[cluster_id].legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed =
no_reason_freq;
for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
default_freq_config[cluster_id].ipc_reason_config[i].freq_allowed = val[i];
data[i] = val[i];
}
ret = 0;
unlock:
mutex_unlock(&freq_reason_mutex);
return ret;
}
/* return highest ipc of the cluster */
unsigned int get_cluster_ipc_level_freq(int curr_cpu, u64 time)
{
int cpu, winning_cpu, cpu_ipc_level = 0, index = 0;
struct walt_sched_cluster *cluster = cpu_cluster(curr_cpu);
struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
if (!smart_freq_init_done)
return 0;
for_each_cpu(cpu, &cluster->cpus) {
cpu_ipc_level = per_cpu(ipc_level, cpu);
if ((time - per_cpu(last_ipc_update, cpu)) > 7999999ULL) {
cpu_ipc_level = 0;
per_cpu(tickless_mode, cpu) = true;
} else {
per_cpu(tickless_mode, cpu) = false;
}
if (cpu_ipc_level >= index) {
winning_cpu = cpu;
index = cpu_ipc_level;
}
}
smart_freq_info->cluster_ipc_level = index;
trace_ipc_freq(cluster->id, winning_cpu, index,
smart_freq_info->ipc_reason_config[index].freq_allowed,
time, per_cpu(ipc_deactivate_ns, winning_cpu), curr_cpu,
per_cpu(ipc_cnt, curr_cpu));
return smart_freq_info->ipc_reason_config[index].freq_allowed;
}
static inline bool has_internal_freq_limit_changed(struct walt_sched_cluster *cluster)
{
unsigned int internal_freq, ipc_freq;
int i;
struct smart_freq_cluster_info *smci = cluster->smart_freq_info;
internal_freq = cluster->walt_internal_freq_limit;
cluster->walt_internal_freq_limit = cluster->max_freq;
for (i = 0; i < MAX_FREQ_CAP; i++)
cluster->walt_internal_freq_limit = min(freq_cap[i][cluster->id],
cluster->walt_internal_freq_limit);
ipc_freq = smci->ipc_reason_config[smci->cluster_ipc_level].freq_allowed;
cluster->walt_internal_freq_limit = max(ipc_freq,
cluster->walt_internal_freq_limit);
return cluster->walt_internal_freq_limit != internal_freq;
}
void update_smart_freq_capacities_one_cluster(struct walt_sched_cluster *cluster)
{
int cpu;
if (!smart_freq_init_done)
return;
if (has_internal_freq_limit_changed(cluster)) {
for_each_cpu(cpu, &cluster->cpus)
update_cpu_capacity_helper(cpu);
}
}
void update_smart_freq_capacities(void)
{
struct walt_sched_cluster *cluster;
if (!smart_freq_init_done)
return;
for_each_sched_cluster(cluster)
update_smart_freq_capacities_one_cluster(cluster);
}
/*
* Update the active smart freq reason for the cluster.
*/
static void smart_freq_update_one_cluster(struct walt_sched_cluster *cluster,
uint32_t current_reasons, u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
{
uint32_t current_reason, cluster_active_reason;
struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
unsigned long max_cap =
smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
int max_reason, i;
unsigned long old_freq_cap = freq_cap[SMART_FREQ][cluster->id];
struct rq *rq;
char smart_freq[25] = {0};
char smart_freq_reason[25] = {0};
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
current_reason = current_reasons & BIT(i);
cluster_active_reason = smart_freq_info->cluster_active_reason & BIT(i);
if (current_reason) {
smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
smart_freq_info->cluster_active_reason |= BIT(i);
if (i == TRAILBLAZER_SMART_FREQ)
trail_active = true;
else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
sustain_active = true;
} else if (cluster_active_reason) {
if (!smart_freq_info->legacy_reason_status[i].deactivate_ns)
smart_freq_info->legacy_reason_status[i].deactivate_ns = wallclock;
}
if (cluster_active_reason) {
/*
* For reasons with deactivation hysteresis, check here if we have
* crossed the hysteresis time and then deactivate the reason.
* We are relying on scheduler tick path to call this function
* thus deactivation of reason is only at tick
* boundary.
*/
if (smart_freq_info->legacy_reason_status[i].deactivate_ns) {
u64 delta = wallclock -
smart_freq_info->legacy_reason_status[i].deactivate_ns;
if (delta >= smart_freq_info->legacy_reason_config[i].hyst_ns) {
smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
smart_freq_info->cluster_active_reason &= ~BIT(i);
if (i == TRAILBLAZER_SMART_FREQ)
trail_active = false;
else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
sustain_active = false;
continue;
}
}
if (max_cap < smart_freq_info->legacy_reason_config[i].freq_allowed) {
max_cap = smart_freq_info->legacy_reason_config[i].freq_allowed;
max_reason = i;
}
}
}
if (enable_logging) {
snprintf(smart_freq, sizeof(smart_freq), "smart_fmax_%d", cluster->id);
trace_clock_set_rate(smart_freq, max_cap, raw_smp_processor_id());
snprintf(smart_freq_reason, sizeof(smart_freq_reason), "legacy_reason_%d", cluster->id);
trace_clock_set_rate(smart_freq_reason, max_reason, raw_smp_processor_id());
}
trace_sched_freq_uncap(cluster->id, nr_big, wakeup_ctr_sum, current_reasons,
smart_freq_info->cluster_active_reason, max_cap, max_reason);
if (old_freq_cap == max_cap)
return;
freq_cap[SMART_FREQ][cluster->id] = max_cap;
rq = cpu_rq(cpumask_first(&cluster->cpus));
/*
* cpufreq smart freq doesn't call get_util for the cpu, hence
* invoking callback without rq lock is safe.
*/
waltgov_run_callback(rq, WALT_CPUFREQ_SMART_FREQ_BIT);
}
#define UNCAP_THRES 300000000
#define UTIL_THRESHOLD 90
static bool thres_based_uncap(u64 window_start, struct walt_sched_cluster *cluster)
{
int cpu;
bool cluster_high_load = false, sustained_load = false;
unsigned long freq_capacity, tgt_cap;
unsigned long tgt_freq =
cluster->smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
struct walt_rq *wrq;
freq_capacity = arch_scale_cpu_capacity(cpumask_first(&cluster->cpus));
tgt_cap = mult_frac(freq_capacity, tgt_freq, cluster->max_possible_freq);
for_each_cpu(cpu, &cluster->cpus) {
wrq = &per_cpu(walt_rq, cpu);
if (wrq->util >= mult_frac(tgt_cap, UTIL_THRESHOLD, 100)) {
cluster_high_load = true;
if (!cluster->found_ts)
cluster->found_ts = window_start;
else if ((window_start - cluster->found_ts) >= UNCAP_THRES)
sustained_load = true;
break;
}
}
if (!cluster_high_load)
cluster->found_ts = 0;
return sustained_load;
}
unsigned int big_task_cnt = 6;
#define WAKEUP_CNT 100
/*
* reason is a two part bitmap
* 15 - 0 : reason type
* 31 - 16: changed state of reason
* this will help to pass multiple reasons at once and avoid multiple calls.
*/
/*
* This will be called from irq work path only
*/
void smart_freq_update_reason_common(u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
{
struct walt_sched_cluster *cluster;
bool current_state;
uint32_t cluster_reasons;
int i;
int cluster_active_reason;
uint32_t cluster_participation_mask;
bool sustained_load = false;
if (!smart_freq_init_done)
return;
for_each_sched_cluster(cluster)
sustained_load |= thres_based_uncap(wallclock, cluster);
for_each_sched_cluster(cluster) {
cluster_reasons = 0;
i = cluster->id;
cluster_participation_mask =
cluster->smart_freq_info->smart_freq_participation_mask;
/*
* NO_REASON
*/
if (cluster_participation_mask & BIT(NO_REASON_SMART_FREQ))
cluster_reasons |= BIT(NO_REASON_SMART_FREQ);
/*
* BOOST
*/
if (cluster_participation_mask & BIT(BOOST_SMART_FREQ)) {
current_state = is_storage_boost() || is_full_throttle_boost();
if (current_state)
cluster_reasons |= BIT(BOOST_SMART_FREQ);
}
/*
* TRAILBLAZER
*/
if (cluster_participation_mask & BIT(TRAILBLAZER_SMART_FREQ)) {
current_state = trailblazer_state;
if (current_state)
cluster_reasons |= BIT(TRAILBLAZER_SMART_FREQ);
}
/*
* SBT
*/
if (cluster_participation_mask & BIT(SBT_SMART_FREQ)) {
current_state = prev_is_sbt;
if (current_state)
cluster_reasons |= BIT(SBT_SMART_FREQ);
}
/*
* BIG_TASKCNT
*/
if (cluster_participation_mask & BIT(BIG_TASKCNT_SMART_FREQ)) {
current_state = (nr_big >= big_task_cnt) &&
(wakeup_ctr_sum < WAKEUP_CNT);
if (current_state)
cluster_reasons |= BIT(BIG_TASKCNT_SMART_FREQ);
}
/*
* SUSTAINED_HIGH_UTIL
*/
if (cluster_participation_mask & BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ)) {
current_state = sustained_load;
if (current_state)
cluster_reasons |= BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ);
}
/*
* PIPELINE_60FPS_OR_LESSER
*/
if (cluster_participation_mask &
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ)) {
current_state = pipeline_in_progress() &&
sched_ravg_window >= SCHED_RAVG_16MS_WINDOW;
if (current_state)
cluster_reasons |=
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ);
}
/*
* PIPELINE_90FPS
*/
if (cluster_participation_mask &
BIT(PIPELINE_90FPS_SMART_FREQ)) {
current_state = pipeline_in_progress() &&
sched_ravg_window == SCHED_RAVG_12MS_WINDOW;
if (current_state)
cluster_reasons |=
BIT(PIPELINE_90FPS_SMART_FREQ);
}
/*
* PIPELINE_120FPS_OR_GREATER
*/
if (cluster_participation_mask &
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ)) {
current_state = pipeline_in_progress() &&
sched_ravg_window == SCHED_RAVG_8MS_WINDOW;
if (current_state)
cluster_reasons |=
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ);
}
/*
* THERMAL_ROTATION
*/
if (cluster_participation_mask & BIT(THERMAL_ROTATION_SMART_FREQ)) {
current_state = (oscillate_cpu != -1);
if (current_state)
cluster_reasons |= BIT(THERMAL_ROTATION_SMART_FREQ);
}
cluster_active_reason = cluster->smart_freq_info->cluster_active_reason;
/* update the reasons for all the clusters */
if (cluster_reasons || cluster_active_reason)
smart_freq_update_one_cluster(cluster, cluster_reasons, wallclock,
nr_big, wakeup_ctr_sum);
}
}
/* Common config for 4 cluster system */
struct smart_freq_cluster_info default_freq_config[MAX_CLUSTERS];
void smart_freq_init(const char *name)
{
struct walt_sched_cluster *cluster;
int i = 0, j;
for_each_sched_cluster(cluster) {
cluster->smart_freq_info = &default_freq_config[i];
cluster->smart_freq_info->smart_freq_participation_mask = BIT(NO_REASON_SMART_FREQ);
cluster->smart_freq_info->cluster_active_reason = 0;
cluster->smart_freq_info->min_cycles = 100;
cluster->smart_freq_info->smart_freq_ipc_participation_mask = 0;
freq_cap[SMART_FREQ][cluster->id] = FREQ_QOS_MAX_DEFAULT_VALUE;
memset(cluster->smart_freq_info->legacy_reason_status, 0,
sizeof(struct smart_freq_legacy_reason_status) *
LEGACY_SMART_FREQ);
memset(cluster->smart_freq_info->legacy_reason_config, 0,
sizeof(struct smart_freq_legacy_reason_config) *
LEGACY_SMART_FREQ);
memset(cluster->smart_freq_info->ipc_reason_config, 0,
sizeof(struct smart_freq_ipc_reason_config) *
SMART_FMAX_IPC_MAX);
for (j = 0; j < LEGACY_SMART_FREQ; j++) {
cluster->smart_freq_info->legacy_reason_config[j].freq_allowed =
FREQ_QOS_MAX_DEFAULT_VALUE;
}
for (j = 0; j < SMART_FMAX_IPC_MAX; j++) {
cluster->smart_freq_info->ipc_reason_config[j].freq_allowed =
FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster0[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster1[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster2[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster3[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
}
i++;
}
if (!strcmp(name, "SUN")) {
for_each_sched_cluster(cluster) {
if (cluster->id == 0) {
/* Legacy */
cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
2400000;
cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
300000000;
cluster->smart_freq_info->smart_freq_participation_mask |=
BIT(BOOST_SMART_FREQ) |
BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
BIT(BIG_TASKCNT_SMART_FREQ) |
BIT(TRAILBLAZER_SMART_FREQ) |
BIT(SBT_SMART_FREQ) |
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
BIT(PIPELINE_90FPS_SMART_FREQ) |
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
BIT(THERMAL_ROTATION_SMART_FREQ);
/* IPC */
cluster->smart_freq_info->ipc_reason_config[0].ipc = 120;
cluster->smart_freq_info->ipc_reason_config[1].ipc = 180;
cluster->smart_freq_info->ipc_reason_config[2].ipc = 220;
cluster->smart_freq_info->ipc_reason_config[3].ipc = 260;
cluster->smart_freq_info->ipc_reason_config[4].ipc = 300;
cluster->smart_freq_info->smart_freq_ipc_participation_mask =
BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
BIT(IPC_E);
cluster->smart_freq_info->min_cycles = 5806080;
} else if (cluster->id == 1) {
/* Legacy */
cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
3513600;
cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
300000000;
cluster->smart_freq_info->smart_freq_participation_mask |=
BIT(BOOST_SMART_FREQ) |
BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
BIT(BIG_TASKCNT_SMART_FREQ) |
BIT(TRAILBLAZER_SMART_FREQ) |
BIT(SBT_SMART_FREQ) |
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
BIT(PIPELINE_90FPS_SMART_FREQ) |
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
BIT(THERMAL_ROTATION_SMART_FREQ);
/* IPC */
cluster->smart_freq_info->ipc_reason_config[0].ipc = 220;
cluster->smart_freq_info->ipc_reason_config[1].ipc = 260;
cluster->smart_freq_info->ipc_reason_config[2].ipc = 280;
cluster->smart_freq_info->ipc_reason_config[3].ipc = 320;
cluster->smart_freq_info->ipc_reason_config[4].ipc = 400;
cluster->smart_freq_info->smart_freq_ipc_participation_mask =
BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
BIT(IPC_E);
cluster->smart_freq_info->min_cycles = 7004160;
}
}
}
smart_freq_init_done = true;
update_smart_freq_capacities();
}

1964
kernel/sched/walt/sysctl.c Normal file

File diff suppressed because it is too large Load Diff

84
kernel/sched/walt/trace.c Normal file
View File

@@ -0,0 +1,84 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
*/
#include "walt.h"
static inline void __window_data(u32 *dst, u32 *src)
{
if (src)
memcpy(dst, src, nr_cpu_ids * sizeof(u32));
else
memset(dst, 0, nr_cpu_ids * sizeof(u32));
}
struct trace_seq;
const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
{
int i;
const char *ret = p->buffer + seq_buf_used(&p->seq);
for (i = 0; i < buf_len; i++)
trace_seq_printf(p, "%u ", buf[i]);
trace_seq_putc(p, 0);
return ret;
}
static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
{
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
if (curr)
if (new)
return wrq->nt_curr_runnable_sum;
else
return wrq->curr_runnable_sum;
else
if (new)
return wrq->nt_prev_runnable_sum;
else
return wrq->prev_runnable_sum;
}
static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
{
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
if (curr)
if (new)
return wrq->grp_time.nt_curr_runnable_sum;
else
return wrq->grp_time.curr_runnable_sum;
else
if (new)
return wrq->grp_time.nt_prev_runnable_sum;
else
return wrq->grp_time.prev_runnable_sum;
}
static inline s64
__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
bool src, bool new, bool curr)
{
switch (migrate_type) {
case RQ_TO_GROUP:
if (src)
return __rq_update_sum(rq, curr, new);
else
return __grp_update_sum(rq, curr, new);
case GROUP_TO_RQ:
if (src)
return __grp_update_sum(rq, curr, new);
else
return __rq_update_sum(rq, curr, new);
default:
WARN_ON_ONCE(1);
return -EINVAL;
}
}
#define CREATE_TRACE_POINTS
#include "trace.h"

2060
kernel/sched/walt/trace.h Normal file

File diff suppressed because it is too large Load Diff

5683
kernel/sched/walt/walt.c Normal file

File diff suppressed because it is too large Load Diff

1545
kernel/sched/walt/walt.h Normal file

File diff suppressed because it is too large Load Diff

1532
kernel/sched/walt/walt_cfs.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,207 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "walt.h"
#include "trace.h"
#include <soc/qcom/socinfo.h>
unsigned long __read_mostly soc_flags;
unsigned int trailblazer_floor_freq[MAX_CLUSTERS];
cpumask_t asym_cap_sibling_cpus;
cpumask_t pipeline_sync_cpus;
int oscillate_period_ns;
int soc_sched_lib_name_capacity;
#define PIPELINE_BUSY_THRESH_8MS_WINDOW 7
#define PIPELINE_BUSY_THRESH_12MS_WINDOW 11
#define PIPELINE_BUSY_THRESH_16MS_WINDOW 15
void walt_config(void)
{
int i, j, cpu;
const char *name = socinfo_get_id_string();
sysctl_sched_group_upmigrate_pct = 100;
sysctl_sched_group_downmigrate_pct = 95;
sysctl_sched_task_unfilter_period = 100000000;
sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
sysctl_sched_ravg_window_nr_ticks = (HZ / NR_WINDOWS_PER_SEC);
sched_load_granule = DEFAULT_SCHED_RAVG_WINDOW / NUM_LOAD_INDICES;
sysctl_sched_coloc_busy_hyst_enable_cpus = 112;
sysctl_sched_util_busy_hyst_enable_cpus = 255;
sysctl_sched_coloc_busy_hyst_max_ms = 5000;
sched_ravg_window = DEFAULT_SCHED_RAVG_WINDOW;
sysctl_input_boost_ms = 40;
sysctl_sched_min_task_util_for_boost = 51;
sysctl_sched_min_task_util_for_uclamp = 51;
sysctl_sched_min_task_util_for_colocation = 35;
sysctl_sched_many_wakeup_threshold = WALT_MANY_WAKEUP_DEFAULT;
sysctl_walt_rtg_cfs_boost_prio = 99; /* disabled by default */
sysctl_sched_sync_hint_enable = 1;
sysctl_sched_skip_sp_newly_idle_lb = 1;
sysctl_sched_hyst_min_coloc_ns = 80000000;
sysctl_sched_idle_enough = SCHED_IDLE_ENOUGH_DEFAULT;
sysctl_sched_cluster_util_thres_pct = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
sysctl_em_inflate_pct = 100;
sysctl_em_inflate_thres = 1024;
sysctl_max_freq_partial_halt = FREQ_QOS_MAX_DEFAULT_VALUE;
asym_cap_sibling_cpus = CPU_MASK_NONE;
pipeline_sync_cpus = CPU_MASK_NONE;
for_each_possible_cpu(cpu) {
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
if (i)
smart_freq_legacy_reason_hyst_ms[i][cpu] = 4;
else
smart_freq_legacy_reason_hyst_ms[i][cpu] = 0;
}
}
for (i = 0; i < MAX_MARGIN_LEVELS; i++) {
sysctl_sched_capacity_margin_up_pct[i] = 95; /* ~5% margin */
sysctl_sched_capacity_margin_dn_pct[i] = 85; /* ~15% margin */
sysctl_sched_early_up[i] = 1077;
sysctl_sched_early_down[i] = 1204;
}
for (i = 0; i < WALT_NR_CPUS; i++) {
sysctl_sched_coloc_busy_hyst_cpu[i] = 39000000;
sysctl_sched_coloc_busy_hyst_cpu_busy_pct[i] = 10;
sysctl_sched_util_busy_hyst_cpu[i] = 5000000;
sysctl_sched_util_busy_hyst_cpu_util[i] = 15;
sysctl_input_boost_freq[i] = 0;
}
for (i = 0; i < MAX_CLUSTERS; i++) {
sysctl_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
high_perf_cluster_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_sched_idle_enough_clust[i] = SCHED_IDLE_ENOUGH_DEFAULT;
sysctl_sched_cluster_util_thres_pct_clust[i] = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
trailblazer_floor_freq[i] = 0;
for (j = 0; j < MAX_CLUSTERS; j++) {
load_sync_util_thres[i][j] = 0;
load_sync_low_pct[i][j] = 0;
load_sync_high_pct[i][j] = 0;
}
}
for (i = 0; i < MAX_FREQ_CAP; i++) {
for (j = 0; j < MAX_CLUSTERS; j++)
freq_cap[i][j] = FREQ_QOS_MAX_DEFAULT_VALUE;
}
sysctl_sched_lrpb_active_ms[0] = PIPELINE_BUSY_THRESH_8MS_WINDOW;
sysctl_sched_lrpb_active_ms[1] = PIPELINE_BUSY_THRESH_12MS_WINDOW;
sysctl_sched_lrpb_active_ms[2] = PIPELINE_BUSY_THRESH_16MS_WINDOW;
soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
soc_feat_set(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
soc_feat_set(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
soc_feat_set(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
soc_feat_set(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
soc_feat_set(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
sysctl_pipeline_special_task_util_thres = 100;
sysctl_pipeline_non_special_task_util_thres = 200;
sysctl_pipeline_pin_thres_low_pct = 50;
sysctl_pipeline_pin_thres_high_pct = 60;
/* return if socinfo is not available */
if (!name)
return;
if (!strcmp(name, "SUN")) {
sysctl_sched_suppress_region2 = 1;
soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
soc_feat_unset(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
soc_feat_unset(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
trailblazer_floor_freq[0] = 1000000;
debugfs_walt_features |= WALT_FEAT_TRAILBLAZER_BIT;
debugfs_walt_features |= WALT_FEAT_UCLAMP_FREQ_BIT;
soc_feat_unset(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
soc_feat_set(SOC_ENABLE_FT_BOOST_TO_ALL);
oscillate_period_ns = 8000000;
soc_feat_set(SOC_ENABLE_EXPERIMENT3);
/*G + P*/
cpumask_copy(&pipeline_sync_cpus, cpu_possible_mask);
soc_sched_lib_name_capacity = 2;
soc_feat_unset(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
sysctl_cluster01_load_sync[0] = 350;
sysctl_cluster01_load_sync[1] = 100;
sysctl_cluster01_load_sync[2] = 100;
sysctl_cluster10_load_sync[0] = 512;
sysctl_cluster10_load_sync[1] = 90;
sysctl_cluster10_load_sync[2] = 90;
load_sync_util_thres[0][1] = sysctl_cluster01_load_sync[0];
load_sync_low_pct[0][1] = sysctl_cluster01_load_sync[1];
load_sync_high_pct[0][1] = sysctl_cluster01_load_sync[2];
load_sync_util_thres[1][0] = sysctl_cluster10_load_sync[0];
load_sync_low_pct[1][0] = sysctl_cluster10_load_sync[1];
load_sync_high_pct[1][0] = sysctl_cluster10_load_sync[2];
sysctl_cluster01_load_sync_60fps[0] = 400;
sysctl_cluster01_load_sync_60fps[1] = 60;
sysctl_cluster01_load_sync_60fps[2] = 100;
sysctl_cluster10_load_sync_60fps[0] = 500;
sysctl_cluster10_load_sync_60fps[1] = 70;
sysctl_cluster10_load_sync_60fps[2] = 90;
load_sync_util_thres_60fps[0][1] = sysctl_cluster01_load_sync_60fps[0];
load_sync_low_pct_60fps[0][1] = sysctl_cluster01_load_sync_60fps[1];
load_sync_high_pct_60fps[0][1] = sysctl_cluster01_load_sync_60fps[2];
load_sync_util_thres_60fps[1][0] = sysctl_cluster10_load_sync_60fps[0];
load_sync_low_pct_60fps[1][0] = sysctl_cluster10_load_sync_60fps[1];
load_sync_high_pct_60fps[1][0] = sysctl_cluster10_load_sync_60fps[2];
/* CPU0 needs an 9mS bias for all legacy smart freq reasons */
for (i = 1; i < LEGACY_SMART_FREQ; i++)
smart_freq_legacy_reason_hyst_ms[i][0] = 9;
for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
for (i = 1; i < LEGACY_SMART_FREQ; i++)
smart_freq_legacy_reason_hyst_ms[i][cpu] = 2;
}
for_each_possible_cpu(cpu) {
smart_freq_legacy_reason_hyst_ms[PIPELINE_60FPS_OR_LESSER_SMART_FREQ][cpu] =
1;
}
soc_feat_unset(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
} else if (!strcmp(name, "PINEAPPLE")) {
soc_feat_set(SOC_ENABLE_SILVER_RT_SPREAD_BIT);
soc_feat_set(SOC_ENABLE_BOOST_TO_NEXT_CLUSTER_BIT);
/* T + G */
cpumask_or(&asym_cap_sibling_cpus,
&asym_cap_sibling_cpus, &cpu_array[0][1]);
cpumask_or(&asym_cap_sibling_cpus,
&asym_cap_sibling_cpus, &cpu_array[0][2]);
/*
* Treat Golds and Primes as candidates for load sync under pipeline usecase.
* However, it is possible that a single CPU is not present. As prime is the
* only cluster with only one CPU, guard this setting by ensuring 4 clusters
* are present.
*/
if (num_sched_clusters == 4) {
cpumask_or(&pipeline_sync_cpus,
&pipeline_sync_cpus, &cpu_array[0][2]);
cpumask_or(&pipeline_sync_cpus,
&pipeline_sync_cpus, &cpu_array[0][3]);
}
sysctl_cluster23_load_sync[0] = 350;
sysctl_cluster23_load_sync[1] = 100;
sysctl_cluster23_load_sync[2] = 100;
sysctl_cluster32_load_sync[0] = 512;
sysctl_cluster32_load_sync[1] = 90;
sysctl_cluster32_load_sync[2] = 90;
load_sync_util_thres[2][3] = sysctl_cluster23_load_sync[0];
load_sync_low_pct[2][3] = sysctl_cluster23_load_sync[1];
load_sync_high_pct[2][3] = sysctl_cluster23_load_sync[2];
load_sync_util_thres[3][2] = sysctl_cluster32_load_sync[0];
load_sync_low_pct[3][2] = sysctl_cluster32_load_sync[1];
load_sync_high_pct[3][2] = sysctl_cluster32_load_sync[2];
}
smart_freq_init(name);
}

View File

@@ -0,0 +1,172 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/topology.h>
#include "walt.h"
#define CYCLE_CNTR_OFFSET(core_id, acc_count) \
(acc_count ? ((core_id + 1) * 4) : 0)
struct cpufreq_counter {
u64 total_cycle_counter;
u32 prev_cycle_counter;
spinlock_t lock;
};
static struct cpufreq_counter walt_cpufreq_counter[NR_CPUS];
struct walt_cpufreq_soc_data {
u32 reg_enable;
u32 reg_cycle_cntr;
bool accumulative_counter;
};
struct walt_cpufreq_data {
void __iomem *base;
const struct walt_cpufreq_soc_data *soc_data;
};
static struct walt_cpufreq_data cpufreq_data[MAX_CLUSTERS];
u64 walt_cpufreq_get_cpu_cycle_counter(int cpu, u64 wc)
{
const struct walt_cpufreq_soc_data *soc_data;
struct cpufreq_counter *cpu_counter;
struct walt_cpufreq_data *data;
u64 cycle_counter_ret;
unsigned long flags;
u16 offset;
u32 val;
data = &cpufreq_data[cpu_cluster(cpu)->id];
soc_data = data->soc_data;
cpu_counter = &walt_cpufreq_counter[cpu];
spin_lock_irqsave(&cpu_counter->lock, flags);
offset = CYCLE_CNTR_OFFSET(topology_core_id(cpu),
soc_data->accumulative_counter);
val = readl_relaxed(data->base +
soc_data->reg_cycle_cntr + offset);
if (val < cpu_counter->prev_cycle_counter) {
/* Handle counter overflow */
cpu_counter->total_cycle_counter += UINT_MAX -
cpu_counter->prev_cycle_counter + val;
cpu_counter->prev_cycle_counter = val;
} else {
cpu_counter->total_cycle_counter += val -
cpu_counter->prev_cycle_counter;
cpu_counter->prev_cycle_counter = val;
}
cycle_counter_ret = cpu_counter->total_cycle_counter;
spin_unlock_irqrestore(&cpu_counter->lock, flags);
pr_debug("CPU %u, core-id 0x%x, offset %u cycle_counts=%llu\n",
cpu, topology_core_id(cpu), offset, cycle_counter_ret);
return cycle_counter_ret;
}
static int walt_cpufreq_cycle_cntr_driver_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
struct of_phandle_args args;
struct device_node *cpu_np;
void __iomem *base;
int ret = -ENODEV, index, cpu;
struct walt_sched_cluster *cluster;
for_each_sched_cluster(cluster) {
cpu = cluster_first_cpu(cluster);
cpu_np = of_cpu_device_node_get(cpu);
if (!cpu_np)
return -EINVAL;
ret = of_parse_phandle_with_args(cpu_np, "qcom,freq-domain",
"#freq-domain-cells", 0, &args);
of_node_put(cpu_np);
if (ret)
return ret;
index = args.args[0];
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
if (!res) {
dev_err(dev, "failed to get mem resource %d\n", index);
return -ENODEV;
}
base = devm_ioremap(dev, res->start, resource_size(res));
if (!base) {
dev_err(dev, "failed to map resource %pR\n", res);
return -ENOMEM;
}
cpufreq_data[cluster->id].soc_data = of_device_get_match_data(&pdev->dev);
cpufreq_data[cluster->id].base = base;
/* HW should be in enabled state to proceed */
if (!(readl_relaxed(base + cpufreq_data[cluster->id].soc_data->reg_enable) & 0x1)) {
dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index);
return -ENODEV;
}
}
if (!walt_get_cycle_counts_cb) {
for_each_possible_cpu(cpu)
spin_lock_init(&walt_cpufreq_counter[cpu].lock);
walt_get_cycle_counts_cb = walt_cpufreq_get_cpu_cycle_counter;
use_cycle_counter = true;
complete(&walt_get_cycle_counts_cb_completion);
return 0;
}
return ret;
}
static int walt_cpufreq_cycle_cntr_driver_remove(struct platform_device *pdev)
{
return 0;
}
static const struct walt_cpufreq_soc_data hw_soc_data = {
.reg_enable = 0x0,
.reg_cycle_cntr = 0x9c0,
.accumulative_counter = false,
};
static const struct walt_cpufreq_soc_data epss_soc_data = {
.reg_enable = 0x0,
.reg_cycle_cntr = 0x3c4,
.accumulative_counter = true,
};
static const struct of_device_id walt_cpufreq_cycle_cntr_match[] = {
{ .compatible = "qcom,cycle-cntr-hw", .data = &hw_soc_data },
{ .compatible = "qcom,epss", .data = &epss_soc_data },
{}
};
static struct platform_driver walt_cpufreq_cycle_cntr_driver = {
.driver = {
.name = "walt-cpufreq-cycle-cntr",
.of_match_table = walt_cpufreq_cycle_cntr_match
},
.probe = walt_cpufreq_cycle_cntr_driver_probe,
.remove = walt_cpufreq_cycle_cntr_driver_remove,
};
int walt_cpufreq_cycle_cntr_driver_register(void)
{
return platform_driver_register(&walt_cpufreq_cycle_cntr_driver);
}

View File

@@ -0,0 +1,115 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
#include <trace/hooks/cpufreq.h>
#include "walt.h"
struct walt_cpu_cycle {
rwlock_t lock;
u64 cycles;
u64 last_time_ns;
unsigned int cur_freq_khz;
unsigned int mult_fact;
};
static DEFINE_PER_CPU(struct walt_cpu_cycle, walt_cc);
static u64 walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
{
unsigned long flags;
u64 delta;
u64 ret;
/*
* freq is in KHz. so multiply by 1000.
* time in nsec. so divide by NSEC_PER_SEC.
*
* cycles = (freq * 1000) * (t/10^9)
* = (freq * t)/10^6
*
*/
read_lock_irqsave(&wcc->lock, flags);
delta = wc - wcc->last_time_ns;
ret = wcc->cycles + ((delta * wcc->mult_fact) >> 20);
read_unlock_irqrestore(&wcc->lock, flags);
return ret;
}
static void update_walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
{
unsigned long flags;
u64 delta;
/*
* freq is in KHz. so multiply by 1000.
* time in nsec. so divide by NSEC_PER_SEC.
*
* cycles = (freq * 1000) * (t/10^9)
* = (freq * t)/10^6
*
*/
write_lock_irqsave(&wcc->lock, flags);
delta = wc - wcc->last_time_ns;
wcc->cycles += (delta * wcc->mult_fact) >> 20;
wcc->last_time_ns = wc;
write_unlock_irqrestore(&wcc->lock, flags);
}
u64 walt_cpu_cycle_counter(int cpu, u64 wc)
{
struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, cpu);
u64 cycles;
cycles = walt_compute_cpu_cycles(wcc, wc);
return cycles;
}
static void walt_cpufreq_transition(void *unused, struct cpufreq_policy *policy)
{
int i;
struct walt_cpu_cycle *wcc;
u64 wc;
unsigned int mult_fact;
wc = sched_clock();
for_each_cpu(i, policy->related_cpus) {
wcc = &per_cpu(walt_cc, i);
update_walt_compute_cpu_cycles(wcc, wc);
wcc->cur_freq_khz = policy->cur;
}
mult_fact = (policy->cur << SCHED_CAPACITY_SHIFT)/1000;
mult_fact = (mult_fact << SCHED_CAPACITY_SHIFT)/1000;
for_each_cpu(i, policy->related_cpus) {
wcc = &per_cpu(walt_cc, i);
wcc->mult_fact = mult_fact;
}
}
void walt_cycle_counter_init(void)
{
int i;
for_each_possible_cpu(i) {
struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, i);
rwlock_init(&wcc->lock);
wcc->cur_freq_khz = cpufreq_quick_get(i);
wcc->last_time_ns = 0;
wcc->cycles = 0;
wcc->mult_fact = (wcc->cur_freq_khz << SCHED_CAPACITY_SHIFT)/1000;
wcc->mult_fact = (wcc->mult_fact << SCHED_CAPACITY_SHIFT)/1000;
}
walt_get_cycle_counts_cb = walt_cpu_cycle_counter;
use_cycle_counter = true;
complete(&walt_get_cycle_counts_cb_completion);
register_trace_android_rvh_cpufreq_transition(walt_cpufreq_transition, NULL);
}

View File

@@ -0,0 +1,34 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <trace/hooks/sched.h>
#include "walt.h"
#include "walt_debug.h"
static void android_rvh_schedule_bug(void *unused, void *unused2)
{
BUG();
}
static int __init walt_debug_init(void)
{
int ret;
ret = preemptirq_long_init();
if (ret)
return ret;
register_trace_android_rvh_schedule_bug(android_rvh_schedule_bug, NULL);
return 0;
}
module_init(walt_debug_init);
MODULE_DESCRIPTION("QTI WALT Debug Module");
MODULE_LICENSE("GPL v2");

View File

@@ -0,0 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
int preemptirq_long_init(void);

View File

@@ -0,0 +1,130 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include "walt.h"
struct gclk_counter {
u64 total_cycle_counter;
u64 prev_cycle_counter;
spinlock_t lock;
};
static struct gclk_counter walt_gclk_counter[MAX_CLUSTERS];
struct walt_ncc_data {
void __iomem *base;
};
static struct walt_ncc_data ncc_data[MAX_CLUSTERS];
u64 walt_get_ncc_gclk_cycle_counter(int cpu, u64 wc)
{
struct gclk_counter *ncc_counter;
struct walt_ncc_data *data;
u64 cycle_counter_ret;
unsigned long flags;
int index;
u64 val;
index = topology_cluster_id(cpu);
data = &ncc_data[index];
ncc_counter = &walt_gclk_counter[index];
spin_lock_irqsave(&ncc_counter->lock, flags);
val = readq_relaxed(data->base);
if (val < ncc_counter->prev_cycle_counter) {
/* Handle counter overflow.
* Most likely will not occur
* for 64 bit counter, but
* handling for completeness.
*/
ncc_counter->total_cycle_counter += U64_MAX -
ncc_counter->prev_cycle_counter + val;
ncc_counter->prev_cycle_counter = val;
} else {
ncc_counter->total_cycle_counter += val -
ncc_counter->prev_cycle_counter;
ncc_counter->prev_cycle_counter = val;
}
cycle_counter_ret = ncc_counter->total_cycle_counter;
spin_unlock_irqrestore(&ncc_counter->lock, flags);
pr_debug("CPU %u, cluster-id %d\n", cpu, index);
return cycle_counter_ret;
}
static int walt_gclk_cycle_counter_driver_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
void __iomem *base;
int ret = -ENODEV, index;
struct walt_sched_cluster *cluster;
for_each_sched_cluster(cluster) {
index = topology_cluster_id(cpumask_first(&cluster->cpus));
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
if (!res) {
dev_err(dev, "failed to get mem resource %d\n", index);
return -ENODEV;
}
if (!devm_request_mem_region(dev, res->start, resource_size(res), res->name)) {
dev_err(dev, "failed to request resource %pR\n", res);
return -EBUSY;
}
base = devm_ioremap(dev, res->start, resource_size(res));
if (!base) {
dev_err(dev, "failed to map resource %pR\n", res);
return -ENOMEM;
}
ncc_data[index].base = base;
}
if (!walt_get_cycle_counts_cb) {
for (int i = 0; i < MAX_CLUSTERS; i++)
spin_lock_init(&walt_gclk_counter[i].lock);
walt_get_cycle_counts_cb = walt_get_ncc_gclk_cycle_counter;
use_cycle_counter = true;
complete(&walt_get_cycle_counts_cb_completion);
return 0;
}
return ret;
}
static int walt_gclk_cycle_counter_driver_remove(struct platform_device *pdev)
{
return 0;
}
static const struct of_device_id walt_gclk_cycle_counter_match[] = {
{ .compatible = "qcom,gclk" },
{}
};
static struct platform_driver walt_gclk_cycle_counter_driver = {
.driver = {
.name = "walt-gclk-cycle-counter",
.of_match_table = walt_gclk_cycle_counter_match
},
.probe = walt_gclk_cycle_counter_driver_probe,
.remove = walt_gclk_cycle_counter_driver_remove,
};
int walt_gclk_cycle_counter_driver_register(void)
{
return platform_driver_register(&walt_gclk_cycle_counter_driver);
}

View File

@@ -0,0 +1,713 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/sched/isolation.h>
#include <trace/hooks/sched.h>
#include <walt.h>
#include "trace.h"
#ifdef CONFIG_HOTPLUG_CPU
enum pause_type {
HALT,
PARTIAL_HALT,
MAX_PAUSE_TYPE
};
/* if a cpu is halting */
struct cpumask __cpu_halt_mask;
struct cpumask __cpu_partial_halt_mask;
/* spin lock to allow calling from non-preemptible context */
static DEFINE_RAW_SPINLOCK(halt_lock);
struct halt_cpu_state {
u8 client_vote_mask[MAX_PAUSE_TYPE];
};
static DEFINE_PER_CPU(struct halt_cpu_state, halt_state);
static DEFINE_RAW_SPINLOCK(walt_drain_pending_lock);
/* the amount of time allowed for enqueue operations that happen
* just after a halt operation.
*/
#define WALT_HALT_CHECK_THRESHOLD_NS 400000
/*
* Remove a task from the runqueue and pretend that it's migrating. This
* should prevent migrations for the detached task and disallow further
* changes to tsk_cpus_allowed.
*/
void
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks)
{
walt_lockdep_assert_rq(rq, p);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, p, 0);
list_add(&p->se.group_node, tasks);
}
void attach_tasks_core(struct list_head *tasks, struct rq *rq)
{
struct task_struct *p;
walt_lockdep_assert_rq(rq, NULL);
while (!list_empty(tasks)) {
p = list_first_entry(tasks, struct task_struct, se.group_node);
list_del_init(&p->se.group_node);
BUG_ON(task_rq(p) != rq);
activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
}
}
/*
* Migrate all tasks from the rq, sleeping tasks will be migrated by
* try_to_wake_up()->select_task_rq().
*
* Called with rq->__lock held even though we'er in stop_machine() and
* there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts.
*
* The function will skip CPU pinned kthreads.
*/
static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
{
struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop;
LIST_HEAD(percpu_kthreads);
unsigned int num_pinned_kthreads = 1;
struct rq_flags orf = *rf;
int dest_cpu;
/*
* Fudge the rq selection such that the below task selection loop
* doesn't get stuck on the currently eligible stop task.
*
* We're currently inside stop_machine() and the rq is either stuck
* in the stop_machine_cpu_stop() loop, or we're executing this code,
* either way we should never end up calling schedule() until we're
* done here.
*/
rq->stop = NULL;
/*
* put_prev_task() and pick_next_task() sched
* class method both need to have an up-to-date
* value of rq->clock[_task]
*/
update_rq_clock(rq);
#ifdef CONFIG_SCHED_DEBUG
/* note the clock update in orf */
orf.clock_update_flags |= RQCF_UPDATED;
#endif
for (;;) {
/*
* There's this thread running, bail when that's the only
* remaining thread:
*/
if (rq->nr_running == 1)
break;
next = pick_migrate_task(rq);
/*
* Argh ... no iterator for tasks, we need to remove the
* kthread from the run-queue to continue.
*/
if (is_per_cpu_kthread(next)) {
detach_one_task_core(next, rq, &percpu_kthreads);
num_pinned_kthreads += 1;
continue;
}
/*
* Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->__lock, such that holding either
* stabilizes the mask.
*
* Drop rq->__lock is not quite as disastrous as it usually is
* because !cpu_active at this point, which means load-balance
* will not interfere. Also, stop-machine.
*/
rq_unlock(rq, rf);
raw_spin_lock(&next->pi_lock);
raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
/*
* Since we're inside stop-machine, _nothing_ should have
* changed the task, WARN if weird stuff happened, because in
* that case the above rq->__lock drop is a fail too.
*/
if (task_rq(next) != rq || !task_on_rq_queued(next)) {
raw_spin_unlock(&next->pi_lock);
continue;
}
/* Find suitable destination for @next */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
if (cpu_of(rq) != dest_cpu && !is_migration_disabled(next)) {
/* only perform a required migration */
rq = __migrate_task(rq, rf, next, dest_cpu);
if (rq != dead_rq) {
rq_unlock(rq, rf);
rq = dead_rq;
*rf = orf;
raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
}
} else {
detach_one_task_core(next, rq, &percpu_kthreads);
num_pinned_kthreads += 1;
}
raw_spin_unlock(&next->pi_lock);
}
if (num_pinned_kthreads > 1)
attach_tasks_core(&percpu_kthreads, rq);
rq->stop = stop;
}
void __balance_callbacks(struct rq *rq);
static int drain_rq_cpu_stop(void *data)
{
struct rq *rq = this_rq();
struct rq_flags rf;
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
rq_lock_irqsave(rq, &rf);
/* rq lock is pinned */
/* migrate tasks assumes that the lock is pinned, and will unlock/repin */
migrate_tasks(rq, &rf);
/* __balance_callbacks can unlock and relock the rq lock. unpin */
rq_unpin_lock(rq, &rf);
/*
* service any callbacks that were accumulated, prior to unlocking. such that
* any subsequent calls to rq_lock... will see an rq->balance_callback set to
* the default (0 or balance_push_callback);
*/
wrq->enqueue_counter = 0;
__balance_callbacks(rq);
if (wrq->enqueue_counter)
WALT_BUG(WALT_BUG_WALT, NULL, "cpu: %d task was re-enqueued", cpu_of(rq));
/* lock is no longer pinned, raw unlock using same flags as locking */
raw_spin_rq_unlock_irqrestore(rq, rf.flags);
return 0;
}
static int cpu_drain_rq(unsigned int cpu)
{
if (!cpu_online(cpu))
return 0;
if (available_idle_cpu(cpu))
return 0;
/* this will schedule, must not be in atomic context */
return stop_one_cpu(cpu, drain_rq_cpu_stop, NULL);
}
struct drain_thread_data {
cpumask_t cpus_to_drain;
};
static struct drain_thread_data drain_data = {
.cpus_to_drain = { CPU_BITS_NONE }
};
static int __ref try_drain_rqs(void *data)
{
cpumask_t *cpus_ptr = &((struct drain_thread_data *)data)->cpus_to_drain;
int cpu;
unsigned long flags;
while (!kthread_should_stop()) {
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
if (cpumask_weight(cpus_ptr)) {
cpumask_t local_cpus;
cpumask_copy(&local_cpus, cpus_ptr);
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
for_each_cpu(cpu, &local_cpus)
cpu_drain_rq(cpu);
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
cpumask_andnot(cpus_ptr, cpus_ptr, &local_cpus);
}
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
set_current_state(TASK_RUNNING);
}
return 0;
}
void restrict_cpus_and_freq(struct cpumask *cpus)
{
struct cpumask restrict_cpus;
int cpu = 0;
cpumask_copy(&restrict_cpus, cpus);
if (cpumask_intersects(cpus, cpu_partial_halt_mask) &&
!cpumask_intersects(cpus, cpu_halt_mask) &&
is_state1()) {
for_each_cpu(cpu, cpus)
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
sysctl_max_freq_partial_halt;
} else {
for_each_cpu(cpu, cpus) {
cpumask_or(&restrict_cpus, &restrict_cpus, &(cpu_cluster(cpu)->cpus));
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
FREQ_QOS_MAX_DEFAULT_VALUE;
}
}
update_smart_freq_capacities();
}
struct task_struct *walt_drain_thread;
static int halt_cpus(struct cpumask *cpus, enum pause_type type)
{
int cpu;
int ret = 0;
u64 start_time = 0;
struct halt_cpu_state *halt_cpu_state;
unsigned long flags;
if (trace_halt_cpus_enabled())
start_time = sched_clock();
trace_halt_cpus_start(cpus, 1);
/* add the cpus to the halt mask */
for_each_cpu(cpu, cpus) {
if (cpu == cpumask_first(system_32bit_el0_cpumask())) {
ret = -EINVAL;
goto out;
}
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (type == HALT)
cpumask_set_cpu(cpu, cpu_halt_mask);
else
cpumask_set_cpu(cpu, cpu_partial_halt_mask);
/* guarantee mask written at this time */
wmb();
}
restrict_cpus_and_freq(cpus);
/* migrate tasks off the cpu */
if (type == HALT) {
/* signal and wakeup the drain kthread */
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
cpumask_or(&drain_data.cpus_to_drain, &drain_data.cpus_to_drain, cpus);
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
wake_up_process(walt_drain_thread);
}
out:
trace_halt_cpus(cpus, start_time, 1, ret);
return ret;
}
/* start the cpus again, and kick them to balance */
static int start_cpus(struct cpumask *cpus, enum pause_type type)
{
u64 start_time = sched_clock();
struct halt_cpu_state *halt_cpu_state;
int cpu;
trace_halt_cpus_start(cpus, 0);
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
/* guarantee the halt state is updated */
wmb();
if (type == HALT)
cpumask_clear_cpu(cpu, cpu_halt_mask);
else
cpumask_clear_cpu(cpu, cpu_partial_halt_mask);
/* kick the cpu so it can pull tasks
* after the mask has been cleared.
*/
walt_smp_call_newidle_balance(cpu);
}
restrict_cpus_and_freq(cpus);
trace_halt_cpus(cpus, start_time, 0, 0);
return 0;
}
/* update client for cpus in yield/halt mask */
static void update_clients(struct cpumask *cpus, bool halt, enum pause_client client,
enum pause_type type)
{
int cpu;
struct halt_cpu_state *halt_cpu_state;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (halt)
halt_cpu_state->client_vote_mask[type] |= client;
else
halt_cpu_state->client_vote_mask[type] &= ~client;
}
}
/* remove cpus that are already halted */
static void update_halt_cpus(struct cpumask *cpus, enum pause_type type)
{
int cpu;
struct halt_cpu_state *halt_cpu_state;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (halt_cpu_state->client_vote_mask[type])
cpumask_clear_cpu(cpu, cpus);
}
}
/* cpus will be modified */
static int walt_halt_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
{
int ret = 0;
cpumask_t requested_cpus;
unsigned long flags;
raw_spin_lock_irqsave(&halt_lock, flags);
cpumask_copy(&requested_cpus, cpus);
/* remove cpus that are already halted */
update_halt_cpus(cpus, type);
if (cpumask_empty(cpus)) {
update_clients(&requested_cpus, true, client, type);
goto unlock;
}
ret = halt_cpus(cpus, type);
if (ret < 0)
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
cpumask_pr_args(&requested_cpus));
else
update_clients(&requested_cpus, true, client, type);
unlock:
raw_spin_unlock_irqrestore(&halt_lock, flags);
return ret;
}
int walt_pause_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_halt_cpus(cpus, client, HALT);
}
EXPORT_SYMBOL_GPL(walt_pause_cpus);
int walt_partial_pause_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_halt_cpus(cpus, client, PARTIAL_HALT);
}
EXPORT_SYMBOL_GPL(walt_partial_pause_cpus);
/* cpus will be modified */
static int walt_start_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
{
int ret = 0;
cpumask_t requested_cpus;
unsigned long flags;
raw_spin_lock_irqsave(&halt_lock, flags);
cpumask_copy(&requested_cpus, cpus);
update_clients(&requested_cpus, false, client, type);
/* remove cpus that should still be halted */
update_halt_cpus(cpus, type);
ret = start_cpus(cpus, type);
if (ret < 0) {
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
cpumask_pr_args(&requested_cpus));
/* restore/increment ref counts in case of error */
update_clients(&requested_cpus, true, client, type);
}
raw_spin_unlock_irqrestore(&halt_lock, flags);
return ret;
}
int walt_resume_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_start_cpus(cpus, client, HALT);
}
EXPORT_SYMBOL_GPL(walt_resume_cpus);
int walt_partial_resume_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_start_cpus(cpus, client, PARTIAL_HALT);
}
EXPORT_SYMBOL_GPL(walt_partial_resume_cpus);
/* return true if the requested client has fully halted one of the cpus */
bool cpus_halted_by_client(struct cpumask *cpus, enum pause_client client)
{
struct halt_cpu_state *halt_cpu_state;
int cpu;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if ((bool)(halt_cpu_state->client_vote_mask[HALT] & client))
return true;
}
return false;
}
static void android_rvh_get_nohz_timer_target(void *unused, int *cpu, bool *done)
{
int i, default_cpu = -1;
struct sched_domain *sd;
cpumask_t active_unhalted;
*done = true;
cpumask_andnot(&active_unhalted, cpu_active_mask, cpu_halt_mask);
if (housekeeping_cpu(*cpu, HK_TYPE_TIMER) && !cpu_halted(*cpu)) {
if (!available_idle_cpu(*cpu))
return;
default_cpu = *cpu;
}
/*
* find first cpu halted by core control and try to avoid
* affecting externally halted cpus.
*/
if (!cpumask_weight(&active_unhalted)) {
cpumask_t tmp_pause, tmp_part_pause, tmp_halt, *tmp;
cpumask_and(&tmp_part_pause, cpu_active_mask, &cpus_part_paused_by_us);
cpumask_and(&tmp_pause, cpu_active_mask, &cpus_paused_by_us);
cpumask_and(&tmp_halt, cpu_active_mask, cpu_halt_mask);
tmp = cpumask_weight(&tmp_part_pause) ? &tmp_part_pause :
cpumask_weight(&tmp_pause) ? &tmp_pause : &tmp_halt;
for_each_cpu(i, tmp) {
if ((*cpu == i) && cpumask_weight(tmp) > 1)
continue;
*cpu = i;
return;
}
}
rcu_read_lock();
for_each_domain(*cpu, sd) {
for_each_cpu_and(i, sched_domain_span(sd),
housekeeping_cpumask(HK_TYPE_TIMER)) {
if (*cpu == i)
continue;
if (!available_idle_cpu(i) && !cpu_halted(i)) {
*cpu = i;
goto unlock;
}
}
}
if (default_cpu == -1) {
for_each_cpu_and(i, &active_unhalted,
housekeeping_cpumask(HK_TYPE_TIMER)) {
if (*cpu == i)
continue;
if (!available_idle_cpu(i)) {
*cpu = i;
goto unlock;
}
}
/* choose any active unhalted cpu */
default_cpu = cpumask_any(&active_unhalted);
if (unlikely(default_cpu >= nr_cpu_ids))
goto unlock;
}
*cpu = default_cpu;
unlock:
rcu_read_unlock();
}
/**
* android_rvh_set_cpus_allowed_by_task: disallow cpus that are halted
*
* NOTES: may be called if migration is disabled for the task
* if per-cpu-kthread, must not deliberately return an invalid cpu
* if !per-cpu-kthread, may return an invalid cpu (reject dest_cpu)
* must not change cpu in in_exec 32bit task case
*/
static void android_rvh_set_cpus_allowed_by_task(void *unused,
const struct cpumask *cpu_valid_mask,
const struct cpumask *new_mask,
struct task_struct *p,
unsigned int *dest_cpu)
{
if (unlikely(walt_disabled))
return;
/* allow kthreads to change affinity regardless of halt status of dest_cpu */
if (p->flags & PF_KTHREAD)
return;
if (cpu_halted(*dest_cpu) && !p->migration_disabled) {
cpumask_t allowed_cpus;
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve))
return;
/* remove halted cpus from the valid mask, and store locally */
cpumask_andnot(&allowed_cpus, cpu_valid_mask, cpu_halt_mask);
cpumask_and(&allowed_cpus, &allowed_cpus, new_mask);
/* do not modify dest_cpu if there are no cpus to choose from */
if (!cpumask_empty(&allowed_cpus))
*dest_cpu = cpumask_any_and_distribute(&allowed_cpus, new_mask);
}
}
/**
* android_rvh_rto_next-cpu: disallow halted cpus for irq workfunctions
*/
static void android_rvh_rto_next_cpu(void *unused, int rto_cpu, struct cpumask *rto_mask, int *cpu)
{
cpumask_t allowed_cpus;
if (unlikely(walt_disabled))
return;
if (cpu_halted(*cpu)) {
/* remove halted cpus from the valid mask, and store locally */
cpumask_andnot(&allowed_cpus, rto_mask, cpu_halt_mask);
*cpu = cpumask_next(rto_cpu, &allowed_cpus);
}
}
/**
* android_rvh_is_cpu_allowed: disallow cpus that are halted
*
* NOTE: this function will not be called if migration is disabled for the task.
*/
static void android_rvh_is_cpu_allowed(void *unused, struct task_struct *p, int cpu, bool *allowed)
{
if (unlikely(walt_disabled))
return;
if (cpumask_test_cpu(cpu, cpu_halt_mask)) {
cpumask_t cpus_allowed;
/* default reject for any halted cpu */
*allowed = false;
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve)) {
/* 32bit task in execve. allow this cpu. */
*allowed = true;
return;
}
/*
* for cfs threads, active cpus in the affinity are allowed
* but halted cpus are not allowed
*/
cpumask_and(&cpus_allowed, cpu_active_mask, p->cpus_ptr);
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_halt_mask);
if (!(p->flags & PF_KTHREAD)) {
if (cpumask_empty(&cpus_allowed)) {
/*
* All affined cpus are inactive or halted.
* Allow this cpu for user threads
*/
*allowed = true;
}
return;
}
/* for kthreads, dying cpus are not allowed */
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_dying_mask);
if (cpumask_empty(&cpus_allowed)) {
/*
* All affined cpus inactive or halted or dying.
* Allow this cpu for kthreads
*/
*allowed = true;
}
}
}
void walt_halt_init(void)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
walt_drain_thread = kthread_run(try_drain_rqs, &drain_data, "halt_drain_rqs");
if (IS_ERR(walt_drain_thread)) {
pr_err("Error creating walt drain thread\n");
return;
}
sched_setscheduler_nocheck(walt_drain_thread, SCHED_FIFO, &param);
register_trace_android_rvh_get_nohz_timer_target(android_rvh_get_nohz_timer_target, NULL);
register_trace_android_rvh_set_cpus_allowed_by_task(
android_rvh_set_cpus_allowed_by_task, NULL);
register_trace_android_rvh_rto_next_cpu(android_rvh_rto_next_cpu, NULL);
register_trace_android_rvh_is_cpu_allowed(android_rvh_is_cpu_allowed, NULL);
}
#endif /* CONFIG_HOTPLUG_CPU */

1193
kernel/sched/walt/walt_lb.c Normal file

File diff suppressed because it is too large Load Diff

431
kernel/sched/walt/walt_rt.c Normal file
View File

@@ -0,0 +1,431 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <trace/hooks/sched.h>
#include "walt.h"
#include "trace.h"
static DEFINE_PER_CPU(cpumask_var_t, walt_local_cpu_mask);
DEFINE_PER_CPU(u64, rt_task_arrival_time) = 0;
static bool long_running_rt_task_trace_rgstrd;
static void rt_task_arrival_marker(void *unused, bool preempt,
struct task_struct *prev, struct task_struct *next,
unsigned int prev_state)
{
unsigned int cpu = raw_smp_processor_id();
if (next->policy == SCHED_FIFO && next != cpu_rq(cpu)->stop)
per_cpu(rt_task_arrival_time, cpu) = rq_clock_task(this_rq());
else
per_cpu(rt_task_arrival_time, cpu) = 0;
}
static void long_running_rt_task_notifier(void *unused, struct rq *rq)
{
struct task_struct *curr = rq->curr;
unsigned int cpu = raw_smp_processor_id();
if (!sysctl_sched_long_running_rt_task_ms)
return;
if (!per_cpu(rt_task_arrival_time, cpu))
return;
if (per_cpu(rt_task_arrival_time, cpu) && curr->policy != SCHED_FIFO) {
/*
* It is possible that the scheduling policy for the current
* task might get changed after task arrival time stamp is
* noted during sched_switch of RT task. To avoid such false
* positives, reset arrival time stamp.
*/
per_cpu(rt_task_arrival_time, cpu) = 0;
return;
}
/*
* Since we are called from the main tick, rq clock task must have
* been updated very recently. Use it directly, instead of
* update_rq_clock_task() to avoid warnings.
*/
if (rq->clock_task -
per_cpu(rt_task_arrival_time, cpu)
> sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC) {
printk_deferred("RT task %s (%d) runtime > %u now=%llu task arrival time=%llu runtime=%llu\n",
curr->comm, curr->pid,
sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC,
rq->clock_task,
per_cpu(rt_task_arrival_time, cpu),
rq->clock_task -
per_cpu(rt_task_arrival_time, cpu));
BUG();
}
}
int sched_long_running_rt_task_ms_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
static DEFINE_MUTEX(mutex);
mutex_lock(&mutex);
ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
if (sysctl_sched_long_running_rt_task_ms > 0 &&
sysctl_sched_long_running_rt_task_ms < 800)
sysctl_sched_long_running_rt_task_ms = 800;
if (write && !long_running_rt_task_trace_rgstrd) {
register_trace_sched_switch(rt_task_arrival_marker, NULL);
register_trace_android_vh_scheduler_tick(long_running_rt_task_notifier, NULL);
long_running_rt_task_trace_rgstrd = true;
}
mutex_unlock(&mutex);
return ret;
}
static void walt_rt_energy_aware_wake_cpu(struct task_struct *task, struct cpumask *lowest_mask,
int ret, int *best_cpu)
{
int cpu;
unsigned long util, best_cpu_util = ULONG_MAX;
unsigned long best_cpu_util_cum = ULONG_MAX;
unsigned long util_cum;
unsigned long tutil = task_util(task);
unsigned int best_idle_exit_latency = UINT_MAX;
unsigned int cpu_idle_exit_latency = UINT_MAX;
bool boost_on_big = rt_boost_on_big();
int cluster;
int order_index = (boost_on_big && num_sched_clusters > 1) ? 1 : 0;
int end_index = 0;
bool best_cpu_lt = true;
if (unlikely(walt_disabled))
return;
if (!ret)
return; /* No targets found */
rcu_read_lock();
if (soc_feat(SOC_ENABLE_SILVER_RT_SPREAD_BIT) && order_index == 0)
end_index = 1;
for (cluster = 0; cluster < num_sched_clusters; cluster++) {
for_each_cpu_and(cpu, lowest_mask, &cpu_array[order_index][cluster]) {
bool lt;
trace_sched_cpu_util(cpu, lowest_mask);
if (!cpu_active(cpu))
continue;
if (cpu_halted(cpu))
continue;
if (sched_cpu_high_irqload(cpu))
continue;
if (__cpu_overutilized(cpu, tutil))
continue;
util = cpu_util(cpu);
lt = (walt_low_latency_task(cpu_rq(cpu)->curr) ||
walt_nr_rtg_high_prio(cpu));
/*
* When the best is suitable and the current is not,
* skip it
*/
if (lt && !best_cpu_lt)
continue;
/*
* Either both are sutilable or unsuitable, load takes
* precedence.
*/
if (!(best_cpu_lt ^ lt) && (util > best_cpu_util))
continue;
/*
* If the previous CPU has same load, keep it as
* best_cpu.
*/
if (best_cpu_util == util && *best_cpu == task_cpu(task))
continue;
/*
* If candidate CPU is the previous CPU, select it.
* Otherwise, if its load is same with best_cpu and in
* a shallower C-state, select it. If all above
* conditions are same, select the least cumulative
* window demand CPU.
*/
cpu_idle_exit_latency = walt_get_idle_exit_latency(cpu_rq(cpu));
util_cum = cpu_util_cum(cpu);
if (cpu != task_cpu(task) && best_cpu_util == util) {
if (best_idle_exit_latency < cpu_idle_exit_latency)
continue;
if (best_idle_exit_latency == cpu_idle_exit_latency &&
best_cpu_util_cum < util_cum)
continue;
}
best_idle_exit_latency = cpu_idle_exit_latency;
best_cpu_util_cum = util_cum;
best_cpu_util = util;
*best_cpu = cpu;
best_cpu_lt = lt;
}
if (cluster < end_index) {
if (*best_cpu == -1 || !available_idle_cpu(*best_cpu))
continue;
}
if (*best_cpu != -1)
break;
}
rcu_read_unlock();
}
#ifdef CONFIG_UCLAMP_TASK
static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned int min_cap;
unsigned int max_cap;
unsigned int cpu_cap;
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
max_cap = uclamp_eff_value(p, UCLAMP_MAX);
cpu_cap = capacity_orig_of(cpu);
return cpu_cap >= min(min_cap, max_cap);
}
#else
static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
{
return true;
}
#endif
/*
* walt specific should_honor_rt_sync (see rt.c). this will honor
* the sync flag regardless of whether the current waker is cfs or rt
*/
static inline bool walt_should_honor_rt_sync(struct rq *rq, struct task_struct *p,
bool sync)
{
return sync &&
p->prio <= rq->rt.highest_prio.next &&
rq->rt.rt_nr_running <= 2;
}
enum rt_fastpaths {
NONE = 0,
NON_WAKEUP,
SYNC_WAKEUP,
CLUSTER_PACKING_FASTPATH,
};
static void walt_select_task_rq_rt(void *unused, struct task_struct *task, int cpu,
int sd_flag, int wake_flags, int *new_cpu)
{
struct task_struct *curr;
struct rq *rq, *this_cpu_rq;
bool may_not_preempt;
bool sync = !!(wake_flags & WF_SYNC);
int ret, target = -1, this_cpu;
struct cpumask *lowest_mask = NULL;
int packing_cpu = -1;
int fastpath = NONE;
struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
struct walt_task_struct *wts;
if (unlikely(walt_disabled))
return;
/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) {
fastpath = NON_WAKEUP;
goto out;
}
this_cpu = raw_smp_processor_id();
this_cpu_rq = cpu_rq(this_cpu);
wts = (struct walt_task_struct *) task->android_vendor_data1;
/*
* Respect the sync flag as long as the task can run on this CPU.
*/
if (sysctl_sched_sync_hint_enable && cpu_active(this_cpu) && !cpu_halted(this_cpu) &&
cpumask_test_cpu(this_cpu, task->cpus_ptr) &&
cpumask_test_cpu(this_cpu, &wts->reduce_mask) &&
walt_should_honor_rt_sync(this_cpu_rq, task, sync)) {
fastpath = SYNC_WAKEUP;
*new_cpu = this_cpu;
goto out;
}
*new_cpu = cpu; /* previous CPU as back up */
rq = cpu_rq(cpu);
rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
/*
* If the current task on @p's runqueue is a softirq task,
* it may run without preemption for a time that is
* ill-suited for a waiting RT task. Therefore, try to
* wake this RT task on another runqueue.
*
* Otherwise, just let it ride on the affined RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*
* We take into account the capacity of the CPU to ensure it fits the
* requirement of the task - which is only important on heterogeneous
* systems like big.LITTLE.
*/
may_not_preempt = cpu_busy_with_softirqs(cpu);
lowest_mask = this_cpu_cpumask_var_ptr(walt_local_cpu_mask);
/*
* If we're on asym system ensure we consider the different capacities
* of the CPUs when searching for the lowest_mask.
*/
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, task,
lowest_mask, walt_rt_task_fits_capacity);
packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
if (packing_cpu >= 0) {
while (packing_cpu < WALT_NR_CPUS) {
if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
cpu_active(packing_cpu) &&
!cpu_halted(packing_cpu) &&
(cpu_rq(packing_cpu)->rt.rt_nr_running <= 1))
break;
packing_cpu++;
}
if (packing_cpu < WALT_NR_CPUS) {
fastpath = CLUSTER_PACKING_FASTPATH;
*new_cpu = packing_cpu;
goto unlock;
}
}
cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
if (!cpumask_empty(&lowest_mask_reduced))
walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, &target);
if (target == -1)
walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, &target);
/*
* If cpu is non-preemptible, prefer remote cpu
* even if it's running a higher-prio task.
* Otherwise: Don't bother moving it if the destination CPU is
* not running a lower priority task.
*/
if (target != -1 &&
(may_not_preempt || task->prio < cpu_rq(target)->rt.highest_prio.curr))
*new_cpu = target;
/* if backup or chosen cpu is halted, pick something else */
if (cpu_halted(*new_cpu)) {
cpumask_t non_halted;
/* choose the lowest-order, unhalted, allowed CPU */
cpumask_andnot(&non_halted, task->cpus_ptr, cpu_halt_mask);
target = cpumask_first(&non_halted);
if (target < nr_cpu_ids)
*new_cpu = target;
}
unlock:
rcu_read_unlock();
out:
trace_sched_select_task_rt(task, fastpath, *new_cpu, lowest_mask);
}
static void walt_rt_find_lowest_rq(void *unused, struct task_struct *task,
struct cpumask *lowest_mask, int ret, int *best_cpu)
{
int packing_cpu = -1;
int fastpath = 0;
struct walt_task_struct *wts;
struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
if (unlikely(walt_disabled))
return;
wts = (struct walt_task_struct *) task->android_vendor_data1;
packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
if (packing_cpu >= 0) {
while (packing_cpu < WALT_NR_CPUS) {
if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
cpu_active(packing_cpu) &&
!cpu_halted(packing_cpu) &&
(cpu_rq(packing_cpu)->rt.rt_nr_running <= 2))
break;
packing_cpu++;
}
if (packing_cpu < WALT_NR_CPUS) {
fastpath = CLUSTER_PACKING_FASTPATH;
*best_cpu = packing_cpu;
goto out;
}
}
cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
if (!cpumask_empty(&lowest_mask_reduced))
walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, best_cpu);
if (*best_cpu == -1)
walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, best_cpu);
/*
* Walt was not able to find a non-halted best cpu. Ensure that
* find_lowest_rq doesn't use a halted cpu going forward, but
* does a best effort itself to find a good CPU.
*/
if (*best_cpu == -1)
cpumask_andnot(lowest_mask, lowest_mask, cpu_halt_mask);
out:
trace_sched_rt_find_lowest_rq(task, fastpath, *best_cpu, lowest_mask);
}
void walt_rt_init(void)
{
unsigned int i;
for_each_possible_cpu(i) {
if (!(zalloc_cpumask_var_node(&per_cpu(walt_local_cpu_mask, i),
GFP_KERNEL, cpu_to_node(i)))) {
pr_err("walt_local_cpu_mask alloc failed for cpu%d\n", i);
return;
}
}
register_trace_android_rvh_select_task_rq_rt(walt_select_task_rq_rt, NULL);
register_trace_android_rvh_find_lowest_rq(walt_rt_find_lowest_rq, NULL);
}

161
kernel/sched/walt/walt_tp.c Normal file
View File

@@ -0,0 +1,161 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
#include <linux/cpu.h>
#include <linux/tracepoint.h>
#include <trace/hooks/sched.h>
#include "trace.h"
#define CREATE_TRACE_POINTS
#include "perf_trace_counters.h"
unsigned int sysctl_sched_dynamic_tp_enable;
#define USE_CPUHP_STATE CPUHP_AP_ONLINE_DYN
DEFINE_PER_CPU(u32, cntenset_val);
DEFINE_PER_CPU(unsigned long, previous_ccnt);
DEFINE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
DEFINE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
DEFINE_PER_CPU(u32, old_pid);
DEFINE_PER_CPU(u32, hotplug_flag);
DEFINE_PER_CPU(u64, prev_time);
static int tracectr_cpu_hotplug_coming_up(unsigned int cpu)
{
per_cpu(hotplug_flag, cpu) = 1;
return 0;
}
static void setup_prev_cnts(u32 cpu, u32 cnten_val)
{
int i;
if (cnten_val & CC)
per_cpu(previous_ccnt, cpu) =
read_sysreg(pmccntr_el0);
for (i = 0; i < NUM_L1_CTRS; i++) {
if (cnten_val & (1 << i)) {
/* Select */
write_sysreg(i, pmselr_el0);
isb();
/* Read value */
per_cpu(previous_l1_cnts[i], cpu) =
read_sysreg(pmxevcntr_el0);
}
}
}
void tracectr_notifier(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next,
unsigned int prev_state)
{
u32 cnten_val;
int current_pid;
u32 cpu = task_cpu(next);
u64 now;
if (!trace_sched_switch_with_ctrs_enabled())
return;
current_pid = next->pid;
if (per_cpu(old_pid, cpu) != -1) {
cnten_val = read_sysreg(pmcntenset_el0);
per_cpu(cntenset_val, cpu) = cnten_val;
/* Disable all the counters that were enabled */
write_sysreg(cnten_val, pmcntenclr_el0);
if (per_cpu(hotplug_flag, cpu) == 1) {
per_cpu(hotplug_flag, cpu) = 0;
setup_prev_cnts(cpu, cnten_val);
} else {
trace_sched_switch_with_ctrs(preempt, prev, next);
now = sched_clock();
if ((now - per_cpu(prev_time, cpu)) > NSEC_PER_SEC) {
trace_sched_switch_ctrs_cfg(cpu);
per_cpu(prev_time, cpu) = now;
}
}
/* Enable all the counters that were disabled */
write_sysreg(cnten_val, pmcntenset_el0);
}
per_cpu(old_pid, cpu) = current_pid;
}
static void register_sched_switch_ctrs(void)
{
int cpu, rc;
for_each_possible_cpu(cpu)
per_cpu(old_pid, cpu) = -1;
rc = cpuhp_setup_state_nocalls(USE_CPUHP_STATE, "tracectr_cpu_hotplug",
tracectr_cpu_hotplug_coming_up, NULL);
if (rc >= 0)
register_trace_sched_switch(tracectr_notifier, NULL);
}
static void unregister_sched_switch_ctrs(void)
{
unregister_trace_sched_switch(tracectr_notifier, NULL);
cpuhp_remove_state_nocalls(USE_CPUHP_STATE);
}
const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
{
#ifdef CONFIG_SMP
return rd ? rd->span : NULL;
#else
return NULL;
#endif
}
static void sched_overutilized(void *data, struct root_domain *rd,
bool overutilized)
{
if (trace_sched_overutilized_enabled()) {
char span[SPAN_SIZE];
cpumap_print_to_pagebuf(false, span, sched_trace_rd_span(rd));
trace_sched_overutilized(overutilized, span);
}
}
static void walt_register_dynamic_tp_events(void)
{
register_trace_sched_overutilized_tp(sched_overutilized, NULL);
register_sched_switch_ctrs();
}
static void walt_unregister_dynamic_tp_events(void)
{
unregister_trace_sched_overutilized_tp(sched_overutilized, NULL);
unregister_sched_switch_ctrs();
}
int sched_dynamic_tp_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
static DEFINE_MUTEX(mutex);
int ret = 0, *val = (unsigned int *)table->data;
unsigned int old_val;
mutex_lock(&mutex);
old_val = sysctl_sched_dynamic_tp_enable;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write || (old_val == sysctl_sched_dynamic_tp_enable))
goto done;
if (*val)
walt_register_dynamic_tp_events();
else
walt_unregister_dynamic_tp_events();
done:
mutex_unlock(&mutex);
return ret;
}