Add samsung specific changes

2025-08-11 14:29:00 +02:00
parent c66122e619
commit 4d134a1294
2688 changed files with 1127995 additions and 11475 deletions
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -33,3 +33,4 @@ obj-y += fair.o
 obj-y += build_policy.o
 obj-y += build_utility.o
 obj-$(CONFIG_ANDROID_VENDOR_HOOKS) += vendor_hooks.o
+obj-$(CONFIG_SCHED_WALT) += walt/
--- a/kernel/sched/walt/Kconfig
+++ b/kernel/sched/walt/Kconfig
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# QTI WALT based scheduler
+#
+menu "QTI WALT based scheduler features"
+
+config SCHED_WALT
+	tristate "Support window based load tracking"
+	depends on SMP
+	help
+	This feature will allow the scheduler to maintain a tunable window
+	based set of metrics for tasks and runqueues. These metrics can be
+	used to guide task placement as well as task frequency requirements
+	for cpufreq governors.
+
+config SCHED_WALT_DEBUG
+	tristate "WALT debug module"
+	depends on SCHED_WALT
+	select TRACE_PREEMPT_TOGGLE
+	select TRACE_IRQFLAGS
+	help
+	  This module provides the means of debugging long preempt and
+	  irq disable code. This helps in identifying the scheduling
+	  latencies. The module rely on preemptirq trace hooks and
+	  print the stacktrace to the ftrace upon long preempt and irq
+	  events. Sysctl knobs are available for the user to configure
+	  the thresholds.
+
+	  This module also used to crash the system to catch issues
+	  in scenarios like RT throttling and sleeping while in atomic
+	  context etc.
+
+config SCHED_CONSERVATIVE_BOOST_LPM_BIAS
+	bool "Enable LPM bias if conservative boost is enabled"
+	default n
+	help
+	  This feature will allow the scheduler to disable low power
+	  modes on a cpu if conservative boost is active. The cpu
+	  will not enter low power mode for a hysteresis time period,
+	  which can be configured from userspace.
+endmenu
--- a/kernel/sched/walt/Makefile
+++ b/kernel/sched/walt/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+KCOV_INSTRUMENT := n
+KCSAN_SANITIZE := n
+
+obj-$(CONFIG_SCHED_WALT) += sched-walt.o
+sched-walt-$(CONFIG_SCHED_WALT) := walt.o boost.o sched_avg.o walt_halt.o core_ctl.o trace.o input-boost.o sysctl.o cpufreq_walt.o fixup.o walt_lb.o walt_rt.o walt_cfs.o walt_tp.o walt_config.o walt_cpufreq_cycle_cntr_driver.o walt_gclk_cycle_counter_driver.o walt_cycles.o debugfs.o pipeline.o smart_freq.o mvp_locking.o
+
+obj-$(CONFIG_SCHED_WALT_DEBUG) += sched-walt-debug.o
+sched-walt-debug-$(CONFIG_SCHED_WALT_DEBUG) := walt_debug.o preemptirq_long.o
--- a/kernel/sched/walt/boost.c
+++ b/kernel/sched/walt/boost.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/of.h>
+
+#include "walt.h"
+#include "trace.h"
+
+/*
+ * Scheduler boost is a mechanism to temporarily place tasks on CPUs
+ * with higher capacity than those where a task would have normally
+ * ended up with their load characteristics. Any entity enabling
+ * boost is responsible for disabling it as well.
+ */
+unsigned int sched_boost_type;
+enum sched_boost_policy boost_policy;
+
+static DEFINE_MUTEX(boost_mutex);
+
+void walt_init_tg(struct task_group *tg)
+{
+	struct walt_task_group *wtg;
+
+	wtg = (struct walt_task_group *) tg->android_vendor_data1;
+
+	wtg->colocate = false;
+	wtg->sched_boost_enable[NO_BOOST] = false;
+	wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
+	wtg->sched_boost_enable[CONSERVATIVE_BOOST] = false;
+	wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
+	wtg->sched_boost_enable[STORAGE_BOOST] = true;
+	wtg->sched_boost_enable[BALANCE_BOOST] = false;
+}
+
+void walt_init_topapp_tg(struct task_group *tg)
+{
+	struct walt_task_group *wtg;
+
+	wtg = (struct walt_task_group *) tg->android_vendor_data1;
+
+	wtg->colocate = true;
+	wtg->sched_boost_enable[NO_BOOST] = false;
+	wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
+	wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
+		soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
+	wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
+	wtg->sched_boost_enable[STORAGE_BOOST] = true;
+	wtg->sched_boost_enable[BALANCE_BOOST] = true;
+}
+
+void walt_init_foreground_tg(struct task_group *tg)
+{
+	struct walt_task_group *wtg;
+
+	wtg = (struct walt_task_group *) tg->android_vendor_data1;
+
+	wtg->colocate = false;
+	wtg->sched_boost_enable[NO_BOOST] = false;
+	wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
+	wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
+		soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
+	wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
+	wtg->sched_boost_enable[STORAGE_BOOST] = true;
+	wtg->sched_boost_enable[BALANCE_BOOST] = true;
+}
+
+void walt_init_foregroundboost_tg(struct task_group *tg)
+{
+	struct walt_task_group *wtg;
+
+	wtg = (struct walt_task_group *) tg->android_vendor_data1;
+
+	wtg->colocate = false;
+	wtg->sched_boost_enable[NO_BOOST] = false;
+	wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
+	wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
+		soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
+	wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
+	wtg->sched_boost_enable[STORAGE_BOOST] = true;
+	wtg->sched_boost_enable[BALANCE_BOOST] = true;
+}
+
+/*
+ * Scheduler boost type and boost policy might at first seem unrelated,
+ * however, there exists a connection between them that will allow us
+ * to use them interchangeably during placement decisions. We'll explain
+ * the connection here in one possible way so that the implications are
+ * clear when looking at placement policies.
+ *
+ * When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
+ * When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
+ * neither be none nor RESTRAINED.
+ */
+static void set_boost_policy(int type)
+{
+	if (type == NO_BOOST || type == RESTRAINED_BOOST) {
+		boost_policy = SCHED_BOOST_NONE;
+		return;
+	}
+
+	if (hmp_capable()) {
+		boost_policy = SCHED_BOOST_ON_BIG;
+		return;
+	}
+
+	boost_policy = SCHED_BOOST_ON_ALL;
+}
+
+static bool verify_boost_params(int type)
+{
+	return type >= BALANCE_BOOST_DISABLE && type <= BALANCE_BOOST;
+}
+
+static void sched_no_boost_nop(void)
+{
+}
+
+static void sched_full_throttle_boost_enter(void)
+{
+	core_ctl_set_boost(true);
+	walt_enable_frequency_aggregation(true);
+}
+
+static void sched_full_throttle_boost_exit(void)
+{
+	core_ctl_set_boost(false);
+	walt_enable_frequency_aggregation(false);
+}
+
+static void sched_conservative_boost_enter(void)
+{
+}
+
+static void sched_conservative_boost_exit(void)
+{
+}
+
+static void sched_restrained_boost_enter(void)
+{
+	walt_enable_frequency_aggregation(true);
+}
+
+static void sched_restrained_boost_exit(void)
+{
+	walt_enable_frequency_aggregation(false);
+}
+
+static void sched_storage_boost_enter(void)
+{
+	core_ctl_set_boost(true);
+}
+
+static void sched_storage_boost_exit(void)
+{
+	core_ctl_set_boost(false);
+}
+
+static void sched_balance_boost_enter(void)
+{
+	core_ctl_set_boost(true);
+}
+
+static void sched_balance_boost_exit(void)
+{
+	core_ctl_set_boost(false);
+}
+
+
+struct sched_boost_data {
+	int	refcount;
+	void	(*enter)(void);
+	void	(*exit)(void);
+};
+
+static struct sched_boost_data sched_boosts[] = {
+	[NO_BOOST] = {
+		.refcount	= 0,
+		.enter		= sched_no_boost_nop,
+		.exit		= sched_no_boost_nop,
+	},
+	[FULL_THROTTLE_BOOST] = {
+		.refcount	= 0,
+		.enter		= sched_full_throttle_boost_enter,
+		.exit		= sched_full_throttle_boost_exit,
+	},
+	[CONSERVATIVE_BOOST] = {
+		.refcount	= 0,
+		.enter		= sched_conservative_boost_enter,
+		.exit		= sched_conservative_boost_exit,
+	},
+	[RESTRAINED_BOOST] = {
+		.refcount	= 0,
+		.enter		= sched_restrained_boost_enter,
+		.exit		= sched_restrained_boost_exit,
+	},
+	[STORAGE_BOOST] = {
+		.refcount	= 0,
+		.enter		= sched_storage_boost_enter,
+		.exit		= sched_storage_boost_exit,
+	},
+	[BALANCE_BOOST] = {
+		.refcount	= 0,
+		.enter		= sched_balance_boost_enter,
+		.exit		= sched_balance_boost_exit,
+	},
+};
+
+#define SCHED_BOOST_START FULL_THROTTLE_BOOST
+#define SCHED_BOOST_END (BALANCE_BOOST + 1)
+
+static int sched_effective_boost(void)
+{
+	int i;
+
+	/*
+	 * The boosts are sorted in descending order by
+	 * priority.
+	 */
+	for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++) {
+		if (sched_boosts[i].refcount >= 1)
+			return i;
+	}
+
+	return NO_BOOST;
+}
+
+static void sched_boost_disable(int type)
+{
+	struct sched_boost_data *sb = &sched_boosts[type];
+	int next_boost, prev_boost = sched_boost_type;
+
+	if (sb->refcount <= 0)
+		return;
+
+	sb->refcount--;
+
+	if (sb->refcount)
+		return;
+
+	next_boost = sched_effective_boost();
+	if (next_boost == prev_boost)
+		return;
+	/*
+	 * This boost's refcount becomes zero, so it must
+	 * be disabled. Disable it first and then apply
+	 * the next boost.
+	 */
+	sched_boosts[prev_boost].exit();
+	sched_boosts[next_boost].enter();
+}
+
+static void sched_boost_enable(int type)
+{
+	struct sched_boost_data *sb = &sched_boosts[type];
+	int next_boost, prev_boost = sched_boost_type;
+
+	sb->refcount++;
+
+	if (sb->refcount != 1)
+		return;
+
+	/*
+	 * This boost enable request did not come before.
+	 * Take this new request and find the next boost
+	 * by aggregating all the enabled boosts. If there
+	 * is a change, disable the previous boost and enable
+	 * the next boost.
+	 */
+
+	next_boost = sched_effective_boost();
+	if (next_boost == prev_boost)
+		return;
+
+	sched_boosts[prev_boost].exit();
+	sched_boosts[next_boost].enter();
+}
+
+static void sched_boost_disable_all(void)
+{
+	int i;
+	int prev_boost = sched_boost_type;
+
+	if (prev_boost != NO_BOOST) {
+		sched_boosts[prev_boost].exit();
+		for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++)
+			sched_boosts[i].refcount = 0;
+	}
+}
+
+static void _sched_set_boost(int type)
+{
+	if (type == 0)
+		sched_boost_disable_all();
+	else if (type > 0)
+		sched_boost_enable(type);
+	else
+		sched_boost_disable(-type);
+
+	/*
+	 * sysctl_sched_boost holds the boost request from
+	 * user space which could be different from the
+	 * effectively enabled boost. Update the effective
+	 * boost here.
+	 */
+
+	sched_boost_type = sched_effective_boost();
+	sysctl_sched_boost = sched_boost_type;
+	set_boost_policy(sysctl_sched_boost);
+	trace_sched_set_boost(sysctl_sched_boost);
+}
+
+int sched_set_boost(int type)
+{
+	int ret = 0;
+
+	if (unlikely(walt_disabled))
+		return -EAGAIN;
+
+	mutex_lock(&boost_mutex);
+	if (verify_boost_params(type))
+		_sched_set_boost(type);
+	else
+		ret = -EINVAL;
+	mutex_unlock(&boost_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sched_set_boost);
+
+int sched_boost_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+	unsigned int *data = (unsigned int *)table->data;
+
+	mutex_lock(&boost_mutex);
+
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (ret || !write)
+		goto done;
+
+	if (verify_boost_params(*data))
+		_sched_set_boost(*data);
+	else
+		ret = -EINVAL;
+
+done:
+	mutex_unlock(&boost_mutex);
+	return ret;
+}
+
+void walt_boost_init(void)
+{
+	/* force call the callbacks for default boost */
+	sched_set_boost(FULL_THROTTLE_BOOST);
+}
--- a/kernel/sched/walt/core_ctl.c
+++ b/kernel/sched/walt/core_ctl.c
--- a/kernel/sched/walt/cpufreq_walt.c
+++ b/kernel/sched/walt/cpufreq_walt.c
--- a/kernel/sched/walt/debugfs.c
+++ b/kernel/sched/walt/debugfs.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+#include <trace/hooks/sched.h>
+
+#include "walt.h"
+#include "trace.h"
+
+unsigned int debugfs_walt_features;
+static struct dentry *debugfs_walt;
+void walt_register_debugfs(void)
+{
+	debugfs_walt = debugfs_create_dir("walt", NULL);
+	debugfs_create_u32("walt_features", 0644, debugfs_walt, &debugfs_walt_features);
+}
--- a/kernel/sched/walt/fixup.c
+++ b/kernel/sched/walt/fixup.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <trace/hooks/cpufreq.h>
+#include <trace/hooks/topology.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include "walt.h"
+
+unsigned int cpuinfo_max_freq_cached;
+
+char sched_lib_name[LIB_PATH_LENGTH];
+char sched_lib_task[LIB_PATH_LENGTH];
+unsigned int sched_lib_mask_force;
+
+static bool is_sched_lib_based_app(pid_t pid)
+{
+	const char *name = NULL;
+	char *libname, *lib_list;
+	struct vm_area_struct *vma;
+	char path_buf[LIB_PATH_LENGTH];
+	char *tmp_lib_name;
+	bool found = false;
+	struct task_struct *p;
+	struct mm_struct *mm;
+
+	if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0)
+		return false;
+
+	tmp_lib_name = kmalloc(LIB_PATH_LENGTH, GFP_KERNEL);
+	if (!tmp_lib_name)
+		return false;
+
+	rcu_read_lock();
+	p = pid ? get_pid_task(find_vpid(pid), PIDTYPE_PID) : get_task_struct(current);
+	rcu_read_unlock();
+	if (!p) {
+		kfree(tmp_lib_name);
+		return false;
+	}
+
+	mm = get_task_mm(p);
+	if (mm) {
+		MA_STATE(mas, &mm->mm_mt, 0, 0);
+		down_read(&mm->mmap_lock);
+
+		mas_for_each(&mas, vma, ULONG_MAX) {
+			if (vma->vm_file && vma->vm_flags & VM_EXEC) {
+				name = d_path(&vma->vm_file->f_path,
+						path_buf, LIB_PATH_LENGTH);
+				if (IS_ERR(name))
+					goto release_sem;
+
+				strscpy(tmp_lib_name, sched_lib_name, LIB_PATH_LENGTH);
+				lib_list = tmp_lib_name;
+				while ((libname = strsep(&lib_list, ","))) {
+					libname = skip_spaces(libname);
+					if (strnstr(name, libname,
+						strnlen(name, LIB_PATH_LENGTH))) {
+						found = true;
+						goto release_sem;
+					}
+				}
+			}
+		}
+
+release_sem:
+		up_read(&mm->mmap_lock);
+		mmput(mm);
+
+	}
+	put_task_struct(p);
+	kfree(tmp_lib_name);
+	return found;
+}
+
+bool is_sched_lib_task(void)
+{
+	if (strnlen(sched_lib_task, LIB_PATH_LENGTH) == 0)
+		return false;
+
+	if (strnstr(current->comm, sched_lib_task, strnlen(current->comm, LIB_PATH_LENGTH)))
+		return true;
+
+	return false;
+}
+
+static char cpu_cap_fixup_target[TASK_COMM_LEN];
+
+static int proc_cpu_capacity_fixup_target_show(struct seq_file *m, void *data)
+{
+	seq_printf(m, "%s\n", cpu_cap_fixup_target);
+	return 0;
+}
+
+static int proc_cpu_capacity_fixup_target_open(struct inode *inode,
+		struct file *file)
+{
+	return single_open(file, proc_cpu_capacity_fixup_target_show, NULL);
+}
+
+static ssize_t proc_cpu_capacity_fixup_target_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *offs)
+{
+	char temp[TASK_COMM_LEN] = {0, };
+	int len = 0;
+
+	len = (count > TASK_COMM_LEN) ? TASK_COMM_LEN : count;
+	if (copy_from_user(temp, buf, len))
+		return -EFAULT;
+
+	if (temp[len - 1] == '\n')
+		temp[len - 1] = '\0';
+
+	strlcpy(cpu_cap_fixup_target, temp, TASK_COMM_LEN);
+
+	return count;
+}
+
+static const struct proc_ops proc_cpu_capacity_fixup_target_op = {
+	.proc_open = proc_cpu_capacity_fixup_target_open,
+	.proc_write = proc_cpu_capacity_fixup_target_write,
+	.proc_read = seq_read,
+	.proc_lseek = seq_lseek,
+	.proc_release = single_release,
+};
+
+static void android_rvh_show_max_freq(void *unused, struct cpufreq_policy *policy,
+				     unsigned int *max_freq)
+{
+	int curr_len = 0;
+
+	if (!cpuinfo_max_freq_cached)
+		return;
+
+	curr_len = strnlen(current->comm, TASK_COMM_LEN);
+	if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
+		if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
+			*max_freq = cpuinfo_max_freq_cached;
+			return;
+		}
+	}
+
+	if (!(BIT(policy->cpu) & sched_lib_mask_force))
+		return;
+
+	if (is_sched_lib_based_app(current->pid) || is_sched_lib_task())
+		*max_freq = cpuinfo_max_freq_cached << 1;
+}
+
+static void android_rvh_cpu_capacity_show(void *unused,
+		unsigned long *capacity, int cpu)
+{
+	int curr_len = 0;
+
+	curr_len = strnlen(current->comm, TASK_COMM_LEN);
+	if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
+		if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
+			*capacity = SCHED_CAPACITY_SCALE;
+			return;
+		}
+	}
+
+	if (!soc_sched_lib_name_capacity)
+		return;
+
+	if ((is_sched_lib_based_app(current->pid) || is_sched_lib_task()) &&
+			cpu < soc_sched_lib_name_capacity)
+		*capacity = 100;
+}
+
+void walt_fixup_init(void)
+{
+	if (!proc_create("cpu_capacity_fixup_target",
+			0660, NULL, &proc_cpu_capacity_fixup_target_op))
+		pr_err("Failed to register 'cpu_capacity_fixup_target'\n");
+
+	register_trace_android_rvh_show_max_freq(android_rvh_show_max_freq, NULL);
+	register_trace_android_rvh_cpu_capacity_show(android_rvh_cpu_capacity_show, NULL);
+}
--- a/kernel/sched/walt/input-boost.c
+++ b/kernel/sched/walt/input-boost.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2015,2017,2019-2021, The Linux Foundation. All rights reserved.
+ */
+
+#define pr_fmt(fmt) "input-boost: " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/time.h>
+#include <linux/sysfs.h>
+#include <linux/pm_qos.h>
+
+#include "walt.h"
+
+#define input_boost_attr_rw(_name)		\
+static struct kobj_attribute _name##_attr =	\
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+#define show_one(file_name)			\
+static ssize_t show_##file_name			\
+(struct kobject *kobj, struct kobj_attribute *attr, char *buf)	\
+{								\
+	return scnprintf(buf, PAGE_SIZE, "%u\n", file_name);	\
+}
+
+#define store_one(file_name)					\
+static ssize_t store_##file_name				\
+(struct kobject *kobj, struct kobj_attribute *attr,		\
+const char *buf, size_t count)					\
+{								\
+								\
+	sscanf(buf, "%u", &file_name);				\
+	return count;						\
+}
+
+struct cpu_sync {
+	int		cpu;
+	unsigned int	input_boost_min;
+	unsigned int	input_boost_freq;
+};
+
+static DEFINE_PER_CPU(struct cpu_sync, sync_info);
+static struct workqueue_struct *input_boost_wq;
+
+static struct work_struct input_boost_work;
+
+static bool sched_boost_active;
+
+static struct delayed_work input_boost_rem;
+static u64 last_input_time;
+#define MIN_INPUT_INTERVAL (150 * USEC_PER_MSEC)
+
+static DEFINE_PER_CPU(struct freq_qos_request, qos_req);
+
+static void boost_adjust_notify(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct cpu_sync *s = &per_cpu(sync_info, cpu);
+	unsigned int ib_min = s->input_boost_min;
+	struct freq_qos_request *req = &per_cpu(qos_req, cpu);
+	int ret;
+
+	pr_debug("CPU%u policy min before boost: %u kHz\n",
+			 cpu, policy->min);
+	pr_debug("CPU%u boost min: %u kHz\n", cpu, ib_min);
+
+	ret = freq_qos_update_request(req, ib_min);
+
+	if (ret < 0)
+		pr_err("Failed to update freq constraint in boost_adjust: %d\n",
+								ib_min);
+
+	pr_debug("CPU%u policy min after boost: %u kHz\n", cpu, policy->min);
+}
+
+static void update_policy_online(void)
+{
+	unsigned int i;
+	struct cpufreq_policy *policy;
+	struct cpumask online_cpus;
+
+	/* Re-evaluate policy to trigger adjust notifier for online CPUs */
+	cpus_read_lock();
+	online_cpus = *cpu_online_mask;
+	for_each_cpu(i, &online_cpus) {
+		policy = cpufreq_cpu_get(i);
+		if (!policy) {
+			pr_err("%s: cpufreq policy not found for cpu%d\n",
+							__func__, i);
+			return;
+		}
+
+		cpumask_andnot(&online_cpus, &online_cpus,
+						policy->related_cpus);
+		boost_adjust_notify(policy);
+	}
+	cpus_read_unlock();
+}
+
+static void do_input_boost_rem(struct work_struct *work)
+{
+	unsigned int i, ret;
+	struct cpu_sync *i_sync_info;
+
+	/* Reset the input_boost_min for all CPUs in the system */
+	pr_debug("Resetting input boost min for all CPUs\n");
+	for_each_possible_cpu(i) {
+		i_sync_info = &per_cpu(sync_info, i);
+		i_sync_info->input_boost_min = 0;
+	}
+
+	/* Update policies for all online CPUs */
+	update_policy_online();
+
+	if (sched_boost_active) {
+		ret = sched_set_boost(0);
+		if (!ret)
+			pr_err("input-boost: sched boost disable failed\n");
+		sched_boost_active = false;
+	}
+}
+
+static void do_input_boost(struct work_struct *work)
+{
+	unsigned int cpu, ret;
+	struct cpu_sync *i_sync_info;
+
+	cancel_delayed_work_sync(&input_boost_rem);
+	if (sched_boost_active) {
+		sched_set_boost(0);
+		sched_boost_active = false;
+	}
+
+	/* Set the input_boost_min for all CPUs in the system */
+	pr_debug("Setting input boost min for all CPUs\n");
+	for_each_possible_cpu(cpu) {
+		i_sync_info = &per_cpu(sync_info, cpu);
+		i_sync_info->input_boost_min = sysctl_input_boost_freq[cpu];
+	}
+
+	/* Update policies for all online CPUs */
+	update_policy_online();
+
+	/* Enable scheduler boost to migrate tasks to big cluster */
+	if (sysctl_sched_boost_on_input > 0) {
+		ret = sched_set_boost(sysctl_sched_boost_on_input);
+		if (ret)
+			pr_err("input-boost: sched boost enable failed\n");
+		else
+			sched_boost_active = true;
+	}
+
+	queue_delayed_work(input_boost_wq, &input_boost_rem,
+					msecs_to_jiffies(sysctl_input_boost_ms));
+}
+
+static void inputboost_input_event(struct input_handle *handle,
+		unsigned int type, unsigned int code, int value)
+{
+	u64 now;
+	int cpu;
+	int enabled = 0;
+
+	for_each_possible_cpu(cpu) {
+		if (sysctl_input_boost_freq[cpu] > 0) {
+			enabled = 1;
+			break;
+		}
+	}
+	if (!enabled)
+		return;
+
+	now = ktime_to_us(ktime_get());
+	if (now - last_input_time < MIN_INPUT_INTERVAL)
+		return;
+
+	if (work_pending(&input_boost_work))
+		return;
+
+	queue_work(input_boost_wq, &input_boost_work);
+	last_input_time = ktime_to_us(ktime_get());
+}
+
+static int inputboost_input_connect(struct input_handler *handler,
+		struct input_dev *dev, const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int error;
+
+	handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = "cpufreq";
+
+	error = input_register_handle(handle);
+	if (error)
+		goto err2;
+
+	error = input_open_device(handle);
+	if (error)
+		goto err1;
+
+	return 0;
+err1:
+	input_unregister_handle(handle);
+err2:
+	kfree(handle);
+	return error;
+}
+
+static void inputboost_input_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+static const struct input_device_id inputboost_ids[] = {
+	/* multi-touch touchscreen */
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+			INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
+			BIT_MASK(ABS_MT_POSITION_X) |
+			BIT_MASK(ABS_MT_POSITION_Y)
+		},
+	},
+	/* touchpad */
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_KEYBIT |
+			INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
+		.absbit = { [BIT_WORD(ABS_X)] =
+			BIT_MASK(ABS_X) | BIT_MASK(ABS_Y)
+		},
+	},
+	/* Keypad */
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+	},
+	{ },
+};
+
+static struct input_handler inputboost_input_handler = {
+	.event		= inputboost_input_event,
+	.connect	= inputboost_input_connect,
+	.disconnect	= inputboost_input_disconnect,
+	.name		= "input-boost",
+	.id_table	= inputboost_ids,
+};
+
+struct kobject *input_boost_kobj;
+int input_boost_init(void)
+{
+	int cpu, ret;
+	struct cpu_sync *s;
+	struct cpufreq_policy *policy;
+	struct freq_qos_request *req;
+
+	input_boost_wq = alloc_workqueue("inputboost_wq", WQ_HIGHPRI, 0);
+	if (!input_boost_wq)
+		return -EFAULT;
+
+	INIT_WORK(&input_boost_work, do_input_boost);
+	INIT_DELAYED_WORK(&input_boost_rem, do_input_boost_rem);
+
+	for_each_possible_cpu(cpu) {
+		s = &per_cpu(sync_info, cpu);
+		s->cpu = cpu;
+		req = &per_cpu(qos_req, cpu);
+		policy = cpufreq_cpu_get(cpu);
+		if (!policy) {
+			pr_err("%s: cpufreq policy not found for cpu%d\n",
+							__func__, cpu);
+			return -ESRCH;
+		}
+
+		ret = freq_qos_add_request(&policy->constraints, req,
+						FREQ_QOS_MIN, policy->min);
+		if (ret < 0) {
+			pr_err("%s: Failed to add freq constraint (%d)\n",
+							__func__, ret);
+			return ret;
+		}
+	}
+
+	ret = input_register_handler(&inputboost_input_handler);
+	return 0;
+}
--- a/kernel/sched/walt/mvp_locking.c
+++ b/kernel/sched/walt/mvp_locking.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <trace/hooks/dtask.h>
+#include "../../locking/mutex.h"
+#include "walt.h"
+
+static void android_vh_alter_mutex_list_add(void *unused, struct mutex *lock,
+				struct mutex_waiter *waiter, struct list_head *list,
+				bool *already_on_list)
+{
+	struct walt_task_struct *wts_waiter =
+		(struct walt_task_struct *)current->android_vendor_data1;
+	struct mutex_waiter *pos = NULL;
+	struct mutex_waiter *n = NULL;
+	struct list_head *head = list;
+	struct walt_task_struct *wts;
+
+	if (unlikely(walt_disabled))
+		return;
+
+	if (!lock || !waiter || !list)
+		return;
+
+	if (!is_mvp(wts_waiter))
+		return;
+
+	list_for_each_entry_safe(pos, n, head, list) {
+		wts = (struct walt_task_struct *)
+			((struct task_struct *)(pos->task)->android_vendor_data1);
+		if (!is_mvp(wts)) {
+			list_add(&waiter->list, pos->list.prev);
+			*already_on_list = true;
+			break;
+		}
+	}
+}
+
+void walt_mvp_lock_ordering_init(void)
+{
+	register_trace_android_vh_alter_mutex_list_add(android_vh_alter_mutex_list_add, NULL);
+}
--- a/kernel/sched/walt/perf_trace_counters.h
+++ b/kernel/sched/walt/perf_trace_counters.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013-2014, 2017, 2021, The Linux Foundation. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM perf_trace_counters
+
+#if !defined(_PERF_TRACE_COUNTERS_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _PERF_TRACE_COUNTERS_H_
+
+/* Ctr index for PMCNTENSET/CLR */
+#define CC 0x80000000
+#define C0 0x1
+#define C1 0x2
+#define C2 0x4
+#define C3 0x8
+#define C4 0x10
+#define C5 0x20
+#define C_ALL (CC | C0 | C1 | C2 | C3 | C4 | C5)
+#define TYPE_MASK 0xFFFF
+#define NUM_L1_CTRS 6
+#define NUM_AMU_CTRS 3
+
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/tracepoint.h>
+
+DECLARE_PER_CPU(u32, cntenset_val);
+DECLARE_PER_CPU(unsigned long, previous_ccnt);
+DECLARE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
+DECLARE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
+
+#ifdef CREATE_TRACE_POINTS
+static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+{
+	unsigned int state;
+
+#ifdef CONFIG_SCHED_DEBUG
+	BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+
+	/*
+	 * Preemption ignores task state, therefore preempted tasks are always
+	 * RUNNING (we will not have dequeued if state != RUNNING).
+	 */
+	if (preempt)
+		return TASK_REPORT_MAX;
+
+	/*
+	 * task_state_index() uses fls() and returns a value from 0-8 range.
+	 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+	 * it for left shift operation to get the correct task->state
+	 * mapping.
+	 */
+	state = task_state_index(p);
+
+	return state ? (1 << (state - 1)) : state;
+}
+#endif /* CREATE_TRACE_POINTS */
+
+TRACE_EVENT(sched_switch_with_ctrs,
+
+		TP_PROTO(bool preempt,
+			struct task_struct *prev,
+			struct task_struct *next),
+
+		TP_ARGS(preempt, prev, next),
+
+		TP_STRUCT__entry(
+			__field(pid_t, prev_pid)
+			__field(pid_t, next_pid)
+			__array(char, prev_comm, TASK_COMM_LEN)
+			__array(char, next_comm, TASK_COMM_LEN)
+			__field(long, prev_state)
+			__field(unsigned long, cctr)
+			__field(unsigned long, ctr0)
+			__field(unsigned long, ctr1)
+			__field(unsigned long, ctr2)
+			__field(unsigned long, ctr3)
+			__field(unsigned long, ctr4)
+			__field(unsigned long, ctr5)
+			__field(unsigned long, amu0)
+			__field(unsigned long, amu1)
+			__field(unsigned long, amu2)
+		),
+
+		TP_fast_assign(
+			u32 cpu = smp_processor_id();
+			u32 i;
+			u32 cnten_val;
+			unsigned long total_ccnt = 0;
+			unsigned long total_cnt = 0;
+			unsigned long amu_cnt = 0;
+			unsigned long delta_l1_cnts[NUM_L1_CTRS] = {0};
+			unsigned long delta_amu_cnts[NUM_AMU_CTRS] = {0};
+
+			memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+			memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+			__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
+			__entry->prev_pid	= prev->pid;
+			__entry->next_pid	= next->pid;
+
+			cnten_val = per_cpu(cntenset_val, cpu);
+
+			if (cnten_val & CC) {
+				/* Read value */
+				total_ccnt = read_sysreg(pmccntr_el0);
+				__entry->cctr = total_ccnt -
+					per_cpu(previous_ccnt, cpu);
+				per_cpu(previous_ccnt, cpu) = total_ccnt;
+			}
+			for (i = 0; i < NUM_L1_CTRS; i++) {
+				if (cnten_val & (1 << i)) {
+					/* Select */
+					write_sysreg(i, pmselr_el0);
+					isb();
+					/* Read value */
+					total_cnt = read_sysreg(pmxevcntr_el0);
+					delta_l1_cnts[i] = total_cnt -
+					  per_cpu(previous_l1_cnts[i], cpu);
+					per_cpu(previous_l1_cnts[i], cpu) =
+						total_cnt;
+				} else
+					delta_l1_cnts[i] = 0;
+			}
+
+			if (IS_ENABLED(CONFIG_ARM64_AMU_EXTN)) {
+				amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
+				delta_amu_cnts[0] = amu_cnt -
+					per_cpu(previous_amu_cnts[0], cpu);
+				per_cpu(previous_amu_cnts[0], cpu) = amu_cnt;
+
+				amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
+				delta_amu_cnts[1] = amu_cnt -
+					per_cpu(previous_amu_cnts[1], cpu);
+				per_cpu(previous_amu_cnts[1], cpu) = amu_cnt;
+
+				amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_MEM_STALL);
+				delta_amu_cnts[2] = amu_cnt -
+					per_cpu(previous_amu_cnts[2], cpu);
+				per_cpu(previous_amu_cnts[2], cpu) = amu_cnt;
+			}
+
+			__entry->ctr0 = delta_l1_cnts[0];
+			__entry->ctr1 = delta_l1_cnts[1];
+			__entry->ctr2 = delta_l1_cnts[2];
+			__entry->ctr3 = delta_l1_cnts[3];
+			__entry->ctr4 = delta_l1_cnts[4];
+			__entry->ctr5 = delta_l1_cnts[5];
+			__entry->amu0 = delta_amu_cnts[0];
+			__entry->amu1 = delta_amu_cnts[1];
+			__entry->amu2 = delta_amu_cnts[2];
+		),
+
+		TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s next_pid=%d CCNTR=%lu CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu, CYC: %lu, INST: %lu, STALL: %lu",
+			__entry->prev_comm, __entry->prev_pid,
+
+			(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
+			  __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
+					{ TASK_INTERRUPTIBLE, "S" },
+					{ TASK_UNINTERRUPTIBLE, "D" },
+					{ __TASK_STOPPED, "T" },
+					{ __TASK_TRACED, "t" },
+					{ EXIT_DEAD, "X" },
+					{ EXIT_ZOMBIE, "Z" },
+					{ TASK_PARKED, "P" },
+					{ TASK_DEAD, "I" }) :
+			"R",
+
+			__entry->prev_state & TASK_REPORT_MAX ? "+" : "",
+			__entry->next_comm,
+			__entry->next_pid,
+			__entry->cctr,
+			__entry->ctr0, __entry->ctr1,
+			__entry->ctr2, __entry->ctr3,
+			__entry->ctr4, __entry->ctr5,
+			__entry->amu0, __entry->amu1,
+			__entry->amu2)
+);
+
+TRACE_EVENT(sched_switch_ctrs_cfg,
+
+		TP_PROTO(int cpu),
+
+		TP_ARGS(cpu),
+
+		TP_STRUCT__entry(
+			__field(int, cpu)
+			__field(unsigned long, ctr0)
+			__field(unsigned long, ctr1)
+			__field(unsigned long, ctr2)
+			__field(unsigned long, ctr3)
+			__field(unsigned long, ctr4)
+			__field(unsigned long, ctr5)
+		),
+
+		TP_fast_assign(
+			u32 i;
+			u32 cnten_val;
+			u32 ctr_type[NUM_L1_CTRS] = {0};
+
+			cnten_val = per_cpu(cntenset_val, cpu);
+
+			for (i = 0; i < NUM_L1_CTRS; i++) {
+				if (cnten_val & (1 << i)) {
+					/* Select */
+					write_sysreg(i, pmselr_el0);
+					isb();
+					/* Read type */
+					ctr_type[i] = read_sysreg(pmxevtyper_el0)
+								& TYPE_MASK;
+				} else
+					ctr_type[i] = 0;
+			}
+
+			__entry->cpu  = cpu;
+			__entry->ctr0 = ctr_type[0];
+			__entry->ctr1 = ctr_type[1];
+			__entry->ctr2 = ctr_type[2];
+			__entry->ctr3 = ctr_type[3];
+			__entry->ctr4 = ctr_type[4];
+			__entry->ctr5 = ctr_type[5];
+		),
+
+		TP_printk("cpu=%d CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu",
+				__entry->cpu,
+				__entry->ctr0, __entry->ctr1,
+				__entry->ctr2, __entry->ctr3,
+				__entry->ctr4, __entry->ctr5)
+);
+
+#endif
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../kernel/sched/walt
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE perf_trace_counters
+#include <trace/define_trace.h>
--- a/kernel/sched/walt/pipeline.c
+++ b/kernel/sched/walt/pipeline.c
@@ -0,0 +1,762 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "walt.h"
+#include "trace.h"
+
+
+static DEFINE_RAW_SPINLOCK(pipeline_lock);
+static struct walt_task_struct *pipeline_wts[WALT_NR_CPUS];
+int pipeline_nr;
+
+static DEFINE_RAW_SPINLOCK(heavy_lock);
+static struct walt_task_struct *heavy_wts[MAX_NR_PIPELINE];
+bool pipeline_pinning;
+
+static inline int pipeline_demand(struct walt_task_struct *wts)
+{
+	return scale_time_to_util(wts->coloc_demand);
+}
+
+int add_pipeline(struct walt_task_struct *wts)
+{
+	int i, pos = -1, ret = -ENOSPC;
+	unsigned long flags;
+	int max_nr_pipeline = cpumask_weight(&cpus_for_pipeline);
+
+	if (unlikely(walt_disabled))
+		return -EAGAIN;
+
+	raw_spin_lock_irqsave(&pipeline_lock, flags);
+
+	for (i = 0; i < max_nr_pipeline; i++) {
+		if (wts == pipeline_wts[i]) {
+			ret = 0;
+			goto out;
+		}
+
+		if (pipeline_wts[i] == NULL)
+			pos = i;
+	}
+
+	if (pos != -1) {
+		pipeline_wts[pos] = wts;
+		pipeline_nr++;
+		ret = 0;
+	}
+out:
+	raw_spin_unlock_irqrestore(&pipeline_lock, flags);
+	return ret;
+}
+
+int remove_pipeline(struct walt_task_struct *wts)
+{
+	int i, j, ret = 0;
+	unsigned long flags;
+
+	if (unlikely(walt_disabled))
+		return -EAGAIN;
+
+	raw_spin_lock_irqsave(&pipeline_lock, flags);
+
+	for (i = 0; i < WALT_NR_CPUS; i++) {
+		if (wts == pipeline_wts[i]) {
+			wts->low_latency &= ~WALT_LOW_LATENCY_PIPELINE_BIT;
+			pipeline_wts[i] = NULL;
+			pipeline_nr--;
+			for (j = i; j < WALT_NR_CPUS - 1; j++) {
+				pipeline_wts[j] = pipeline_wts[j + 1];
+				pipeline_wts[j + 1] = NULL;
+			}
+			goto out;
+		}
+	}
+out:
+	raw_spin_unlock_irqrestore(&pipeline_lock, flags);
+	return ret;
+}
+
+int remove_heavy(struct walt_task_struct *wts)
+{
+	int i, j, ret = 0;
+	unsigned long flags;
+
+	if (unlikely(walt_disabled))
+		return -EAGAIN;
+
+	raw_spin_lock_irqsave(&heavy_lock, flags);
+
+	for (i = 0; i < MAX_NR_PIPELINE; i++) {
+		if (wts == heavy_wts[i]) {
+			wts->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
+			heavy_wts[i] = NULL;
+			have_heavy_list--;
+			for (j = i; j < MAX_NR_PIPELINE - 1; j++) {
+				heavy_wts[j] = heavy_wts[j + 1];
+				heavy_wts[j + 1] = NULL;
+			}
+			goto out;
+		}
+	}
+out:
+	raw_spin_unlock_irqrestore(&heavy_lock, flags);
+	return ret;
+}
+
+void remove_special_task(void)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&heavy_lock, flags);
+	/*
+	 * Although the pipeline special task designation is removed,
+	 * if the task is not dead (i.e. this function was called from sysctl context)
+	 * the task will continue to enjoy pipeline priveleges until the next update in
+	 * find_heaviest_topapp()
+	 */
+	pipeline_special_task = NULL;
+	raw_spin_unlock_irqrestore(&heavy_lock, flags);
+}
+
+void set_special_task(struct task_struct *pipeline_special_local)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&heavy_lock, flags);
+	pipeline_special_task = pipeline_special_local;
+	raw_spin_unlock_irqrestore(&heavy_lock, flags);
+}
+
+cpumask_t cpus_for_pipeline = { CPU_BITS_NONE };
+
+/* always set unisolation for max cluster, for pipeline tasks */
+static inline void pipeline_set_unisolation(bool set, int flag)
+{
+	static bool unisolation_state;
+	struct walt_sched_cluster *cluster;
+	static unsigned int enable_pipeline_unisolation;
+
+	if (!set)
+		enable_pipeline_unisolation &= ~(1 << flag);
+	else
+		enable_pipeline_unisolation |= (1 << flag);
+
+	if (unisolation_state && !enable_pipeline_unisolation) {
+		unisolation_state = false;
+
+		for_each_sched_cluster(cluster) {
+			if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
+			    is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
+				core_ctl_set_cluster_boost(cluster->id, false);
+		}
+	} else if (!unisolation_state && enable_pipeline_unisolation) {
+		unisolation_state = true;
+
+		for_each_sched_cluster(cluster) {
+			if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
+			    is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
+				core_ctl_set_cluster_boost(cluster->id, true);
+		}
+	}
+}
+
+/*
+ * sysctl_sched_heavy_nr or sysctl_sched_pipeline_util_thres can change at any moment in time.
+ * as a result, the ability to set/clear unisolation state for a particular type of pipeline, is
+ * hindered. Detect a transition and reset the unisolation state of the pipeline method no longer
+ * in use.
+ */
+static inline void pipeline_reset_unisolation_state(void)
+{
+	static bool last_auto_pipeline;
+
+	if ((sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres) && !last_auto_pipeline) {
+		pipeline_set_unisolation(false, MANUAL_PIPELINE);
+		last_auto_pipeline = true;
+	} else if (!sysctl_sched_heavy_nr &&
+			!sysctl_sched_pipeline_util_thres && last_auto_pipeline) {
+		pipeline_set_unisolation(false, AUTO_PIPELINE);
+		last_auto_pipeline = false;
+	}
+}
+
+static inline bool should_pipeline_pin_special(void)
+{
+	if (!pipeline_special_task)
+		return false;
+	if (!heavy_wts[MAX_NR_PIPELINE - 1])
+		return false;
+	if (pipeline_demand(heavy_wts[0]) <= sysctl_pipeline_special_task_util_thres)
+		return true;
+	if (pipeline_demand(heavy_wts[1]) <= sysctl_pipeline_non_special_task_util_thres)
+		return true;
+	if (pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
+		mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_low_pct, 100)))
+		return false;
+	if (!pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
+		mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_high_pct, 100)))
+		return false;
+
+	return true;
+}
+
+cpumask_t last_available_big_cpus = CPU_MASK_NONE;
+int have_heavy_list;
+u32 total_util;
+bool find_heaviest_topapp(u64 window_start)
+{
+	struct walt_related_thread_group *grp;
+	struct walt_task_struct *wts;
+	unsigned long flags;
+	static u64 last_rearrange_ns;
+	int i, j, start;
+	struct walt_task_struct *heavy_wts_to_drop[MAX_NR_PIPELINE];
+
+	if (num_sched_clusters < 2)
+		return false;
+
+	/* lazy enabling disabling until 100mS for colocation or heavy_nr change */
+	grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
+	if (!grp || (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres) ||
+		sched_boost_type) {
+		if (have_heavy_list) {
+			raw_spin_lock_irqsave(&heavy_lock, flags);
+			for (i = 0; i < MAX_NR_PIPELINE; i++) {
+				if (heavy_wts[i]) {
+					heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
+					heavy_wts[i]->pipeline_cpu = -1;
+					heavy_wts[i] = NULL;
+				}
+			}
+			raw_spin_unlock_irqrestore(&heavy_lock, flags);
+			have_heavy_list = 0;
+
+			pipeline_set_unisolation(false, AUTO_PIPELINE);
+		}
+		return false;
+	}
+
+	if (last_rearrange_ns && (window_start < (last_rearrange_ns + 100 * MSEC_TO_NSEC)))
+		return false;
+	last_rearrange_ns = window_start;
+
+	raw_spin_lock_irqsave(&grp->lock, flags);
+	raw_spin_lock(&heavy_lock);
+
+	/* remember the old ones in _to_drop[] */
+	for (i = 0; i < MAX_NR_PIPELINE; i++) {
+		heavy_wts_to_drop[i] = heavy_wts[i];
+		heavy_wts[i] = NULL;
+	}
+
+	/* Assign user specified one (if exists) to slot 0*/
+	if (pipeline_special_task) {
+		heavy_wts[0] = (struct walt_task_struct *)
+					pipeline_special_task->android_vendor_data1;
+		start = 1;
+	} else {
+		start = 0;
+	}
+
+	/*
+	 * Ensure that heavy_wts either contains the top 3 top-app tasks,
+	 * or the user defined heavy task followed by the top 2 top-app tasks
+	 */
+	list_for_each_entry(wts, &grp->tasks, grp_list) {
+		struct walt_task_struct *to_be_placed_wts = wts;
+
+		/* if the task hasnt seen action recently skip it */
+		if (wts->mark_start < window_start - (sched_ravg_window * 2))
+			continue;
+
+		/* skip user defined task as it's already part of the list*/
+		if (pipeline_special_task && (wts == heavy_wts[0]))
+			continue;
+
+		for (i = start; i < MAX_NR_PIPELINE; i++) {
+			if (!heavy_wts[i]) {
+				heavy_wts[i] = to_be_placed_wts;
+				break;
+			} else if (pipeline_demand(to_be_placed_wts) >=
+					pipeline_demand(heavy_wts[i])) {
+				struct walt_task_struct *tmp;
+
+				tmp = heavy_wts[i];
+				heavy_wts[i] = to_be_placed_wts;
+				to_be_placed_wts = tmp;
+			}
+		}
+	}
+
+	/*
+	 * Determine how many of the top three pipeline tasks
+	 * If "sched_heavy_nr" node is set, the util threshold is ignored.
+	 */
+	total_util = 0;
+	if (sysctl_sched_heavy_nr) {
+		for (i = sysctl_sched_heavy_nr; i < MAX_NR_PIPELINE; i++)
+			heavy_wts[i] = NULL;
+	} else {
+		for (i = 0; i < MAX_NR_PIPELINE; i++) {
+			if (heavy_wts[i])
+				total_util += pipeline_demand(heavy_wts[i]);
+		}
+
+		if (total_util < sysctl_sched_pipeline_util_thres)
+			heavy_wts[MAX_NR_PIPELINE - 1] = NULL;
+	}
+
+	/* reset heavy for tasks that are no longer heavy */
+	for (i = 0; i < MAX_NR_PIPELINE; i++) {
+		bool reset = true;
+
+		if (!heavy_wts_to_drop[i])
+			continue;
+		for (j = 0; j < MAX_NR_PIPELINE; j++) {
+			if (!heavy_wts[j])
+				continue;
+			if (heavy_wts_to_drop[i] == heavy_wts[j]) {
+				reset = false;
+				break;
+			}
+		}
+		if (reset) {
+			heavy_wts_to_drop[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
+			heavy_wts_to_drop[i]->pipeline_cpu = -1;
+		}
+
+		if (heavy_wts[i]) {
+			 heavy_wts[i]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
+		}
+	}
+
+	if (heavy_wts[MAX_NR_PIPELINE - 1])
+		pipeline_set_unisolation(true, AUTO_PIPELINE);
+	else
+		pipeline_set_unisolation(false, AUTO_PIPELINE);
+
+	raw_spin_unlock(&heavy_lock);
+	raw_spin_unlock_irqrestore(&grp->lock, flags);
+	return true;
+}
+
+void assign_heaviest_topapp(bool found_topapp)
+{
+	int i;
+	struct walt_task_struct *wts;
+
+	if (!found_topapp)
+		return;
+
+	raw_spin_lock(&heavy_lock);
+
+	/* start with non-prime cpus chosen for this chipset (e.g. golds) */
+	cpumask_and(&last_available_big_cpus, cpu_online_mask, &cpus_for_pipeline);
+	cpumask_andnot(&last_available_big_cpus, &last_available_big_cpus, cpu_halt_mask);
+
+	/*
+	 * Ensure the special task is only pinned if there are 3 auto pipeline tasks and
+	 * check certain demand conditions between special pipeline task and the largest
+	 * non-special pipeline task.
+	 */
+	if (should_pipeline_pin_special()) {
+		pipeline_pinning = true;
+		heavy_wts[0]->pipeline_cpu =
+			cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
+		heavy_wts[0]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
+		if (cpumask_test_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus))
+			cpumask_clear_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus);
+	} else {
+		pipeline_pinning = false;
+	}
+
+	for (i = 0; i < MAX_NR_PIPELINE; i++) {
+		wts = heavy_wts[i];
+		if (!wts)
+			continue;
+
+		if (i == 0 && pipeline_pinning)
+			continue;
+
+		if (wts->pipeline_cpu != -1) {
+			if (cpumask_test_cpu(wts->pipeline_cpu, &last_available_big_cpus))
+				cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
+			else
+				/* avoid assigning two pipelines to same cpu */
+				wts->pipeline_cpu = -1;
+		}
+	}
+
+	have_heavy_list = 0;
+	/* assign cpus and heavy status to the new heavy */
+	for (i = 0; i < MAX_NR_PIPELINE; i++) {
+		wts = heavy_wts[i];
+		if (!wts)
+			continue;
+
+		if (wts->pipeline_cpu == -1) {
+			wts->pipeline_cpu = cpumask_last(&last_available_big_cpus);
+			if (wts->pipeline_cpu >= nr_cpu_ids) {
+				/* drop from heavy if it can't be assigned */
+				heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
+				heavy_wts[i]->pipeline_cpu = -1;
+				heavy_wts[i] = NULL;
+			} else {
+				/*
+				 * clear cpu from the avalilable list of pipeline cpus.
+				 * as pipeline_cpu is assigned for the task.
+				 */
+				cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
+			}
+		}
+		if (wts->pipeline_cpu >= 0)
+			have_heavy_list++;
+	}
+
+	if (trace_sched_pipeline_tasks_enabled()) {
+		for (i = 0; i < MAX_NR_PIPELINE; i++) {
+			if (heavy_wts[i] != NULL)
+				trace_sched_pipeline_tasks(AUTO_PIPELINE, i, heavy_wts[i],
+						have_heavy_list, total_util, pipeline_pinning);
+		}
+	}
+
+	raw_spin_unlock(&heavy_lock);
+}
+static inline void swap_pipeline_with_prime_locked(struct walt_task_struct *prime_wts,
+						   struct walt_task_struct *other_wts)
+{
+	if (prime_wts && other_wts) {
+		if (pipeline_demand(prime_wts) < pipeline_demand(other_wts)) {
+			int cpu;
+
+			cpu = other_wts->pipeline_cpu;
+			other_wts->pipeline_cpu = prime_wts->pipeline_cpu;
+			prime_wts->pipeline_cpu = cpu;
+			trace_sched_pipeline_swapped(other_wts, prime_wts);
+		}
+	} else if (!prime_wts && other_wts) {
+		/* if prime preferred died promote gold to prime, assumes 1 prime */
+		other_wts->pipeline_cpu =
+			cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
+		trace_sched_pipeline_swapped(other_wts, prime_wts);
+	}
+}
+
+#define WINDOW_HYSTERESIS 4
+static inline bool delay_rearrange(u64 window_start, int pipeline_type, bool force)
+{
+	static u64 last_rearrange_ns[MAX_PIPELINE_TYPES];
+
+	if (!force && last_rearrange_ns[pipeline_type] &&
+			(window_start < (last_rearrange_ns[pipeline_type] +
+			(sched_ravg_window*WINDOW_HYSTERESIS))))
+		return true;
+	last_rearrange_ns[pipeline_type] = window_start;
+	return false;
+}
+
+static inline void find_prime_and_max_tasks(struct walt_task_struct **wts_list,
+					    struct walt_task_struct **prime_wts,
+					    struct walt_task_struct **other_wts)
+{
+	int i;
+	int max_demand = 0;
+
+	for (i = 0; i < MAX_NR_PIPELINE; i++) {
+		struct walt_task_struct *wts = wts_list[i];
+
+		if (wts == NULL)
+			continue;
+
+		if (wts->pipeline_cpu < 0)
+			continue;
+
+		if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
+			if (prime_wts)
+				*prime_wts = wts;
+		} else if (other_wts && pipeline_demand(wts) > max_demand) {
+			max_demand = pipeline_demand(wts);
+			*other_wts = wts;
+		}
+	}
+}
+
+static inline bool is_prime_worthy(struct walt_task_struct *wts)
+{
+	struct task_struct *p;
+
+	if (wts == NULL)
+		return false;
+
+	if (num_sched_clusters < 2)
+		return true;
+
+	p = wts_to_ts(wts);
+
+	/*
+	 * Assume the first row of cpu arrays represents the order of clusters
+	 * in magnitude of capacities, where the last column represents prime,
+	 * and the second to last column represents golds
+	 */
+	return !task_fits_max(p, cpumask_last(&cpu_array[0][num_sched_clusters - 2]));
+}
+
+void rearrange_heavy(u64 window_start, bool force)
+{
+	struct walt_task_struct *prime_wts = NULL;
+	struct walt_task_struct *other_wts = NULL;
+	unsigned long flags;
+
+	if (num_sched_clusters < 2)
+		return;
+
+	raw_spin_lock_irqsave(&heavy_lock, flags);
+	/*
+	 * TODO: As primes are isolated under have_heavy_list < 3, and pipeline misfits are also
+	 * disabled, setting the prime worthy task's pipeline_cpu as CPU7 could lead to the
+	 * pipeline_cpu selection being ignored until the next run of find_heaviest_toppapp(),
+	 * and furthermore remove the task's current gold pipeline_cpu, which could cause the
+	 * task to start bouncing around on the golds, and ultimately lead to suboptimal behavior.
+	 */
+	if (have_heavy_list <= 2) {
+		find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
+
+		if (prime_wts && !is_prime_worthy(prime_wts)) {
+			int assign_cpu;
+
+			/* demote prime_wts, it is not worthy */
+			assign_cpu = cpumask_first(&last_available_big_cpus);
+			if (assign_cpu < nr_cpu_ids) {
+				prime_wts->pipeline_cpu = assign_cpu;
+				cpumask_clear_cpu(assign_cpu, &last_available_big_cpus);
+				prime_wts = NULL;
+			}
+			/* if no pipeline cpu available to assign, leave task on prime */
+		}
+
+		if (!prime_wts && is_prime_worthy(other_wts)) {
+			/* promote other_wts to prime, it is worthy */
+			swap_pipeline_with_prime_locked(NULL, other_wts);
+		}
+
+		goto out;
+	}
+
+	if (pipeline_pinning)
+		goto out;
+
+	if (delay_rearrange(window_start, AUTO_PIPELINE, force))
+		goto out;
+
+	if (!soc_feat(SOC_ENABLE_PIPELINE_SWAPPING_BIT) && !force)
+		goto out;
+
+	/* swap prime for have_heavy_list >= 3 */
+	find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
+	swap_pipeline_with_prime_locked(prime_wts, other_wts);
+
+out:
+	raw_spin_unlock_irqrestore(&heavy_lock, flags);
+}
+
+void rearrange_pipeline_preferred_cpus(u64 window_start)
+{
+	unsigned long flags;
+	struct walt_task_struct *wts;
+	bool set_unisolation = false;
+	u32 max_demand = 0;
+	struct walt_task_struct *prime_wts = NULL;
+	struct walt_task_struct *other_wts = NULL;
+	static int assign_cpu = -1;
+	static bool last_set_unisolation;
+	int i;
+
+	if (sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres)
+		return;
+
+	if (num_sched_clusters < 2)
+		return;
+
+	if (!pipeline_nr || sched_boost_type)
+		goto out;
+
+	if (delay_rearrange(window_start, MANUAL_PIPELINE, false))
+		goto out;
+
+	raw_spin_lock_irqsave(&pipeline_lock, flags);
+
+	set_unisolation = true;
+
+	for (i = 0; i < WALT_NR_CPUS; i++) {
+		wts = pipeline_wts[i];
+
+		if (!wts)
+			continue;
+
+		if (!wts->grp)
+			wts->pipeline_cpu = -1;
+
+		/*
+		 * assummes that if one pipeline doesn't have preferred set,
+		 * all pipelines too do not have it set
+		 */
+		if (wts->pipeline_cpu == -1) {
+			assign_cpu = cpumask_next_and(assign_cpu,
+						&cpus_for_pipeline, cpu_online_mask);
+
+			if (assign_cpu >= nr_cpu_ids)
+				/* reset and rotate the cpus */
+				assign_cpu = cpumask_next_and(-1,
+						&cpus_for_pipeline, cpu_online_mask);
+
+			if (assign_cpu >= nr_cpu_ids)
+				wts->pipeline_cpu = -1;
+			else
+				wts->pipeline_cpu = assign_cpu;
+		}
+
+		if (wts->pipeline_cpu != -1) {
+			if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
+				/* assumes just one prime */
+				prime_wts = wts;
+			} else if (pipeline_demand(wts) > max_demand) {
+				max_demand = pipeline_demand(wts);
+				other_wts = wts;
+			}
+		}
+	}
+
+	if (pipeline_nr <= 2) {
+		set_unisolation = false;
+		if (prime_wts && !is_prime_worthy(prime_wts)) {
+			/* demote prime_wts, it is not worthy */
+			assign_cpu = cpumask_next_and(assign_cpu,
+						&cpus_for_pipeline, cpu_online_mask);
+			if (assign_cpu >= nr_cpu_ids)
+				/* reset and rotate the cpus */
+				assign_cpu = cpumask_next_and(-1,
+							&cpus_for_pipeline, cpu_online_mask);
+			if (assign_cpu >= nr_cpu_ids)
+				prime_wts->pipeline_cpu = -1;
+			else
+				prime_wts->pipeline_cpu = assign_cpu;
+			prime_wts = NULL;
+		}
+
+		if (!prime_wts && is_prime_worthy(other_wts)) {
+			/* promote other_wts to prime, it is worthy */
+			swap_pipeline_with_prime_locked(NULL, other_wts);
+			set_unisolation = true;
+		}
+
+		if (prime_wts)
+			set_unisolation = true;
+
+		goto release_lock;
+	}
+
+	/* swap prime for nr_piprline >= 3 */
+	swap_pipeline_with_prime_locked(prime_wts, other_wts);
+
+	if (trace_sched_pipeline_tasks_enabled()) {
+		for (i = 0; i < WALT_NR_CPUS; i++) {
+			if (pipeline_wts[i] != NULL)
+				trace_sched_pipeline_tasks(MANUAL_PIPELINE, i, pipeline_wts[i],
+						pipeline_nr, 0, 0);
+		}
+	}
+
+release_lock:
+	raw_spin_unlock_irqrestore(&pipeline_lock, flags);
+
+out:
+	if (set_unisolation ^ last_set_unisolation) {
+		pipeline_set_unisolation(set_unisolation, MANUAL_PIPELINE);
+		last_set_unisolation = set_unisolation;
+	}
+}
+
+bool pipeline_check(struct walt_rq *wrq)
+{
+	/* found_topapp should force rearrangement */
+	bool found_topapp = find_heaviest_topapp(wrq->window_start);
+
+	rearrange_pipeline_preferred_cpus(wrq->window_start);
+	pipeline_reset_unisolation_state();
+
+	return found_topapp;
+}
+
+void pipeline_rearrange(struct walt_rq *wrq, bool found_topapp)
+{
+	assign_heaviest_topapp(found_topapp);
+	rearrange_heavy(wrq->window_start, found_topapp);
+}
+
+bool enable_load_sync(int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &pipeline_sync_cpus))
+		return false;
+
+	if (!pipeline_in_progress())
+		return false;
+
+	/*
+	 * Under manual pipeline, only load sync between the pipeline_sync_cpus, if at least one
+	 * of the CPUs userspace has allocated for pipeline tasks corresponds to the
+	 * pipeline_sync_cpus
+	 */
+	if (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres &&
+			!cpumask_intersects(&pipeline_sync_cpus, &cpus_for_pipeline))
+		return false;
+
+	/* Ensure to load sync only if there are 3 auto pipeline tasks */
+	if (have_heavy_list)
+		return have_heavy_list == MAX_NR_PIPELINE;
+
+	/*
+	 * If auto pipeline is disabled, manual must be on. Ensure to load sync under manual
+	 * pipeline only if there are 3 or more pipeline tasks
+	 */
+	return pipeline_nr >= MAX_NR_PIPELINE;
+}
+
+/*
+ * pipeline_fits_smaller_cpus evaluates if a pipeline task should be treated as a misfit.
+ * There are three possible outcomes:
+ *	- ret -1: Continue evaluation with task_fits_max().
+ *      - ret  0: Task should be treated as a misfit (does not fit on smaller CPUs).
+ *      - ret  1: Task cannot be treated as a misfit (fits on smaller CPUs).
+ *
+ * If the task is assigned a pipeline CPU which is a prime CPU, ret should be 0, indicating
+ * the task is a misfit.
+ * If the number of pipeline tasks is 2 or fewer, continue evaluation of task_fits_max().
+ * If the number of pipeline tasks is 3 or more, ret should be 1, indicating the task fits on the
+ * smaller CPUs and is not a misfit.
+ */
+int pipeline_fits_smaller_cpus(struct task_struct *p)
+{
+	struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
+	unsigned int pipeline_cpu = wts->pipeline_cpu;
+
+	if (pipeline_cpu == -1)
+		return -1;
+
+	if (cpumask_test_cpu(pipeline_cpu, &cpu_array[0][num_sched_clusters-1]))
+		return 0;
+
+	if (have_heavy_list) {
+		if (have_heavy_list == MAX_NR_PIPELINE)
+			return 1;
+		else
+			return -1;
+	}
+
+	if (pipeline_nr >= MAX_NR_PIPELINE)
+		return 1;
+	else
+		return -1;
+}
--- a/kernel/sched/walt/preemptirq_long.c
+++ b/kernel/sched/walt/preemptirq_long.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021 The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <trace/hooks/preemptirq.h>
+#define CREATE_TRACE_POINTS
+#include "preemptirq_long.h"
+
+#define IRQSOFF_SENTINEL 0x0fffDEAD
+
+static unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000;
+static unsigned int sysctl_irqsoff_tracing_threshold_ns = 5000000;
+static unsigned int sysctl_irqsoff_dmesg_output_enabled;
+static unsigned int sysctl_irqsoff_crash_sentinel_value;
+static unsigned int sysctl_irqsoff_crash_threshold_ns = 10000000;
+
+static unsigned int half_million = 500000;
+static unsigned int one_hundred_million = 100000000;
+static unsigned int one_million = 1000000;
+
+static DEFINE_PER_CPU(u64, irq_disabled_ts);
+
+/*
+ * preemption disable tracking require additional context
+ * to rule out false positives. see the comment in
+ * test_preempt_disable_long() for more details.
+ */
+struct preempt_store {
+	u64		ts;
+	int		pid;
+	unsigned long	ncsw;
+};
+static DEFINE_PER_CPU(struct preempt_store, the_ps);
+
+static void note_irq_disable(void *u1, unsigned long u2, unsigned long u3)
+{
+	if (is_idle_task(current))
+		return;
+
+	/*
+	 * We just have to note down the time stamp here. We
+	 * use stacktrace trigger feature to print the stacktrace.
+	 */
+	this_cpu_write(irq_disabled_ts, sched_clock());
+}
+
+static void test_irq_disable_long(void *u1, unsigned long ip, unsigned long parent_ip)
+{
+	u64 ts = this_cpu_read(irq_disabled_ts);
+
+	if (!ts)
+		return;
+
+	this_cpu_write(irq_disabled_ts, 0);
+	ts = sched_clock() - ts;
+
+	if (ts > sysctl_irqsoff_tracing_threshold_ns) {
+		trace_irq_disable_long(ts, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
+
+		if (sysctl_irqsoff_dmesg_output_enabled == IRQSOFF_SENTINEL)
+			printk_deferred("irqs off exceeds thresh delta=%llu C:(%ps<-%ps<-%ps<-%ps)\n",
+					ts, (void *)CALLER_ADDR2,
+					(void *)CALLER_ADDR3,
+					(void *)CALLER_ADDR4,
+					(void *)CALLER_ADDR5);
+	}
+
+	if (sysctl_irqsoff_crash_sentinel_value == IRQSOFF_SENTINEL &&
+			ts > sysctl_irqsoff_crash_threshold_ns) {
+		printk_deferred("delta=%llu(ns) > crash_threshold=%u(ns) Task=%s\n",
+				ts, sysctl_irqsoff_crash_threshold_ns,
+				current->comm);
+		BUG_ON(1);
+	}
+}
+
+static void note_preempt_disable(void *u1, unsigned long u2, unsigned long u3)
+{
+	struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
+
+	ps->ts = sched_clock();
+	ps->pid = current->pid;
+	ps->ncsw = current->nvcsw + current->nivcsw;
+}
+
+static void test_preempt_disable_long(void *u1, unsigned long ip,
+				      unsigned long parent_ip)
+{
+	struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
+	u64 delta = 0;
+
+	if (!ps->ts)
+		return;
+
+	/*
+	 * schedule() calls __schedule() with preemption disabled.
+	 * if we had entered idle and exiting idle now, we think
+	 * preemption is disabled the whole time. Detect this by
+	 * checking if the preemption is disabled across the same
+	 * task. There is a possiblity that the same task is scheduled
+	 * after idle. To rule out this possibility, compare the
+	 * context switch count also.
+	 */
+	if (ps->pid == current->pid && (ps->ncsw == current->nvcsw +
+				current->nivcsw))
+		delta = sched_clock() - ps->ts;
+
+	ps->ts = 0;
+	if (delta > sysctl_preemptoff_tracing_threshold_ns)
+		trace_preempt_disable_long(delta, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
+}
+
+static struct ctl_table preemptirq_long_table[] = {
+	{
+		.procname	= "preemptoff_tracing_threshold_ns",
+		.data		= &sysctl_preemptoff_tracing_threshold_ns,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "irqsoff_tracing_threshold_ns",
+		.data		= &sysctl_irqsoff_tracing_threshold_ns,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= &half_million,
+		.extra2		= &one_hundred_million,
+	},
+	{
+		.procname	= "irqsoff_dmesg_output_enabled",
+		.data		= &sysctl_irqsoff_dmesg_output_enabled,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "irqsoff_crash_sentinel_value",
+		.data		= &sysctl_irqsoff_crash_sentinel_value,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "irqsoff_crash_threshold_ns",
+		.data		= &sysctl_irqsoff_crash_threshold_ns,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= &one_million,
+		.extra2		= &one_hundred_million,
+	},
+};
+
+int preemptirq_long_init(void)
+{
+	if (!register_sysctl("preemptirq", preemptirq_long_table)) {
+		pr_err("Fail to register sysctl table\n");
+		return -EPERM;
+	}
+
+	register_trace_android_rvh_irqs_disable(note_irq_disable, NULL);
+	register_trace_android_rvh_irqs_enable(test_irq_disable_long, NULL);
+	register_trace_android_rvh_preempt_disable(note_preempt_disable, NULL);
+	register_trace_android_rvh_preempt_enable(test_preempt_disable_long,
+						 NULL);
+
+	return 0;
+}
--- a/kernel/sched/walt/preemptirq_long.h
+++ b/kernel/sched/walt/preemptirq_long.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 The Linux Foundation. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM preemptirq_long
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#if !defined(_TRACE_PREEMPTIRQ_LONG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PREEMPTIRQ_LONG_H
+
+#include <linux/tracepoint.h>
+
+/* reference preemptirq_template */
+DECLARE_EVENT_CLASS(preemptirq_long_template,
+
+	TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
+		unsigned long pparent_ip, unsigned long ppparent_ip),
+
+	TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip),
+
+	TP_STRUCT__entry(
+		__field(u64, delta)
+		__field(unsigned long, caller_offs)
+		__field(unsigned long, parent_offs)
+		__field(unsigned long, pparent_offs)
+		__field(unsigned long, ppparent_offs)
+	),
+
+	TP_fast_assign(
+		__entry->delta = delta;
+		__entry->caller_offs = ip;
+		__entry->parent_offs = parent_ip;
+		__entry->pparent_offs = pparent_ip;
+		__entry->ppparent_offs = ppparent_ip;
+	),
+
+	TP_printk("delta=%llu(ns) caller=%ps <- %ps <- %ps <- %ps",
+		__entry->delta, (void *)__entry->caller_offs,
+		(void *)__entry->parent_offs, (void *)__entry->pparent_offs,
+		(void *)__entry->ppparent_offs)
+);
+
+DEFINE_EVENT(preemptirq_long_template, irq_disable_long,
+	     TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
+		      unsigned long pparent_ip, unsigned long ppparent_ip),
+	     TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
+
+DEFINE_EVENT(preemptirq_long_template, preempt_disable_long,
+	     TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
+		      unsigned long pparent_ip, unsigned long ppparent_ip),
+	     TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
+
+#endif /* _TRACE_PREEMPTIRQ_LONG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--- a/kernel/sched/walt/sched_avg.c
+++ b/kernel/sched/walt/sched_avg.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012, 2015-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+/*
+ * Scheduler hook for average runqueue determination
+ */
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/sched.h>
+#include <linux/math64.h>
+
+#include "walt.h"
+#include "trace.h"
+
+static DEFINE_PER_CPU(u64, nr_prod_sum);
+static DEFINE_PER_CPU(u64, last_time);
+static DEFINE_PER_CPU(int, last_time_cpu);
+static DEFINE_PER_CPU(u64, nr_big_prod_sum);
+static DEFINE_PER_CPU(u64, nr_trailblazer_prod_sum);
+static DEFINE_PER_CPU(u64, nr);
+static DEFINE_PER_CPU(u64, nr_max);
+
+static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
+static s64 last_get_time;
+
+static DEFINE_PER_CPU(atomic64_t, busy_hyst_end_time) = ATOMIC64_INIT(0);
+
+static DEFINE_PER_CPU(u64, hyst_time);
+static DEFINE_PER_CPU(u64, coloc_hyst_busy);
+static DEFINE_PER_CPU(u64, coloc_hyst_time);
+static DEFINE_PER_CPU(u64, util_hyst_time);
+static DEFINE_PER_CPU(u64, smart_freq_legacy_reason_hyst_ns);
+
+#define NR_THRESHOLD_PCT		40
+#define NR_THRESHOLD_TRAIL_PCT		80
+#define MAX_RTGB_TIME (sysctl_sched_coloc_busy_hyst_max_ms * NSEC_PER_MSEC)
+
+struct sched_avg_stats stats[WALT_NR_CPUS];
+unsigned int cstats_util_pct[MAX_CLUSTERS];
+
+u8 smart_freq_legacy_reason_hyst_ms[LEGACY_SMART_FREQ][WALT_NR_CPUS];
+
+/**
+ * sched_get_cluster_util_pct
+ * @return: provide the percentage of this cluter that was used in the
+ *          previous window.
+ *
+ * This routine may be called any number of times as needed during
+ * a window, but will always return the same result until window
+ * rollover.
+ */
+unsigned int sched_get_cluster_util_pct(struct walt_sched_cluster *cluster)
+{
+	unsigned int cluster_util_pct = 0;
+
+	if (cluster->id < MAX_CLUSTERS)
+		cluster_util_pct = cstats_util_pct[cluster->id];
+
+	return cluster_util_pct;
+}
+
+bool trailblazer_state;
+/**
+ * sched_get_nr_running_avg
+ * @return: Average nr_running, iowait and nr_big_tasks value since last poll.
+ *	    Returns the avg * 100 to return up to two decimal points
+ *	    of accuracy.
+ *
+ * Obtains the average nr_running value since the last poll.
+ * This function may not be called concurrently with itself.
+ *
+ * It is assumed that this function is called at most once per window
+ * rollover.
+ */
+struct sched_avg_stats *sched_get_nr_running_avg(void)
+{
+	int cpu;
+	u64 curr_time = sched_clock();
+	u64 period = curr_time - last_get_time;
+	u64 tmp_nr, tmp_misfit, tmp_trailblazer;
+	bool any_hyst_time = false;
+	struct walt_sched_cluster *cluster;
+	bool trailblazer_cpu = false;
+
+	if (unlikely(walt_disabled))
+		return NULL;
+
+	if (!period)
+		goto done;
+
+	/* read and reset nr_running counts */
+	for_each_possible_cpu(cpu) {
+		unsigned long flags;
+		u64 diff;
+
+		spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
+		curr_time = sched_clock();
+		diff = curr_time - per_cpu(last_time, cpu);
+		if ((s64)diff < 0) {
+			printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
+				cpu, curr_time, curr_time, per_cpu(last_time, cpu),
+				per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
+			WALT_PANIC(1);
+		}
+
+		tmp_nr = per_cpu(nr_prod_sum, cpu);
+		tmp_nr += per_cpu(nr, cpu) * diff;
+		tmp_nr = div64_u64((tmp_nr * 100), period);
+
+		tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
+		tmp_misfit += walt_big_tasks(cpu) * diff;
+		tmp_misfit = div64_u64((tmp_misfit * 100), period);
+
+		tmp_trailblazer = per_cpu(nr_trailblazer_prod_sum, cpu);
+		tmp_trailblazer += walt_trailblazer_tasks(cpu) * diff;
+		tmp_trailblazer = div64_u64((tmp_trailblazer * 100), period);
+
+		/*
+		 * NR_THRESHOLD_PCT is to make sure that the task ran
+		 * at least 85% in the last window to compensate any
+		 * over estimating being done.
+		 */
+		stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
+								100);
+		stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
+						NR_THRESHOLD_PCT), 100);
+		trailblazer_cpu |= (int)div64_u64((tmp_trailblazer +
+						NR_THRESHOLD_TRAIL_PCT), 100);
+
+		stats[cpu].nr_max = per_cpu(nr_max, cpu);
+		stats[cpu].nr_scaled = tmp_nr;
+
+		trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
+				stats[cpu].nr_misfit, stats[cpu].nr_max,
+				stats[cpu].nr_scaled, trailblazer_cpu);
+
+		per_cpu(last_time, cpu) = curr_time;
+		per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
+		per_cpu(nr_prod_sum, cpu) = 0;
+		per_cpu(nr_big_prod_sum, cpu) = 0;
+		per_cpu(nr_trailblazer_prod_sum, cpu) = 0;
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
+
+		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
+	}
+
+	trailblazer_state = trailblazer_cpu;
+	/* collect cluster load stats */
+	for_each_sched_cluster(cluster) {
+		unsigned int num_cpus = cpumask_weight(&cluster->cpus);
+		unsigned int sum_util_pct = 0;
+
+		/* load is already scaled, see freq_policy_load/prev_runnable_sum */
+		for_each_cpu(cpu, &cluster->cpus) {
+			struct rq *rq = cpu_rq(cpu);
+			struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
+
+			/* compute the % this cpu's utilization of the cpu capacity,
+			 * and sum it across all cpus
+			 */
+			sum_util_pct +=
+				(wrq->util * 100) / arch_scale_cpu_capacity(cpu);
+		}
+
+		/* calculate the averge per-cpu utilization */
+		cstats_util_pct[cluster->id] = sum_util_pct / num_cpus;
+	}
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(coloc_hyst_time, cpu)) {
+			any_hyst_time = true;
+			break;
+		}
+	}
+	if (any_hyst_time && get_rtgb_active_time() >= MAX_RTGB_TIME)
+		sched_update_hyst_times();
+
+	last_get_time = curr_time;
+
+done:
+	return &stats[0];
+}
+EXPORT_SYMBOL_GPL(sched_get_nr_running_avg);
+
+void sched_update_hyst_times(void)
+{
+	bool rtgb_active;
+	int cpu;
+	unsigned long cpu_cap, coloc_busy_pct;
+
+	rtgb_active = is_rtgb_active() && (sched_boost_type != CONSERVATIVE_BOOST)
+			&& (get_rtgb_active_time() < MAX_RTGB_TIME);
+
+	for_each_possible_cpu(cpu) {
+		cpu_cap = arch_scale_cpu_capacity(cpu);
+		coloc_busy_pct = sysctl_sched_coloc_busy_hyst_cpu_busy_pct[cpu];
+		per_cpu(hyst_time, cpu) = (BIT(cpu)
+			     & sysctl_sched_busy_hyst_enable_cpus) ?
+			     sysctl_sched_busy_hyst : 0;
+		per_cpu(coloc_hyst_time, cpu) = ((BIT(cpu)
+			     & sysctl_sched_coloc_busy_hyst_enable_cpus)
+			     && rtgb_active) ?
+			     sysctl_sched_coloc_busy_hyst_cpu[cpu] : 0;
+		per_cpu(coloc_hyst_busy, cpu) = mult_frac(cpu_cap,
+							coloc_busy_pct, 100);
+		per_cpu(util_hyst_time, cpu) = (BIT(cpu)
+				& sysctl_sched_util_busy_hyst_enable_cpus) ?
+				sysctl_sched_util_busy_hyst_cpu[cpu] : 0;
+	}
+}
+
+#define BUSY_NR_RUN		3
+#define BUSY_LOAD_FACTOR	10
+static inline void update_busy_hyst_end_time(int cpu, int enq,
+				unsigned long prev_nr_run, u64 curr_time)
+{
+	bool nr_run_trigger = false;
+	bool load_trigger = false, coloc_load_trigger = false;
+	u64 agg_hyst_time, total_util = 0;
+	bool util_load_trigger = false;
+	int i;
+	bool hyst_trigger, coloc_trigger;
+	bool dequeue = (enq < 0);
+
+	if (is_max_possible_cluster_cpu(cpu) && is_obet)
+		return;
+
+	if (!per_cpu(hyst_time, cpu) && !per_cpu(coloc_hyst_time, cpu) &&
+	    !per_cpu(util_hyst_time, cpu) && !per_cpu(smart_freq_legacy_reason_hyst_ns, cpu))
+		return;
+
+	if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
+		nr_run_trigger = true;
+
+	if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
+			capacity_orig_of(cpu))
+		load_trigger = true;
+
+	if (dequeue && cpu_util(cpu) > per_cpu(coloc_hyst_busy, cpu))
+		coloc_load_trigger = true;
+
+	if (dequeue) {
+		for_each_possible_cpu(i) {
+			total_util += cpu_util(i);
+			if (total_util >= sysctl_sched_util_busy_hyst_cpu_util[cpu]) {
+				util_load_trigger = true;
+				break;
+			}
+		}
+	}
+
+	coloc_trigger = nr_run_trigger || coloc_load_trigger;
+#if IS_ENABLED(CONFIG_SCHED_CONSERVATIVE_BOOST_LPM_BIAS)
+	hyst_trigger = nr_run_trigger || load_trigger || (sched_boost_type == CONSERVATIVE_BOOST);
+#else
+	hyst_trigger = nr_run_trigger || load_trigger;
+#endif
+
+	agg_hyst_time = max(max(hyst_trigger ? per_cpu(hyst_time, cpu) : 0,
+			    coloc_trigger ? per_cpu(coloc_hyst_time, cpu) : 0),
+			    util_load_trigger ?	per_cpu(util_hyst_time, cpu) : 0);
+	agg_hyst_time = max(agg_hyst_time, per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
+
+	if (agg_hyst_time) {
+		atomic64_set(&per_cpu(busy_hyst_end_time, cpu),
+				curr_time + agg_hyst_time);
+		trace_sched_busy_hyst_time(cpu, agg_hyst_time, prev_nr_run,
+					cpu_util(cpu), per_cpu(hyst_time, cpu),
+					per_cpu(coloc_hyst_time, cpu),
+					per_cpu(util_hyst_time, cpu),
+					per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
+	}
+}
+
+int sched_busy_hyst_handler(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	if (table->maxlen > (sizeof(unsigned int) * num_possible_cpus()))
+		table->maxlen = sizeof(unsigned int) * num_possible_cpus();
+
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (!ret && write)
+		sched_update_hyst_times();
+
+	return ret;
+}
+
+/**
+ * sched_update_nr_prod
+ * @cpu: The core id of the nr running driver.
+ * @enq: enqueue/dequeue/misfit happening on this CPU.
+ * @return: N/A
+ *
+ * Update average with latest nr_running value for CPU
+ */
+void sched_update_nr_prod(int cpu, int enq)
+{
+	u64 diff;
+	u64 curr_time;
+	unsigned long flags, nr_running;
+
+	spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
+	nr_running = per_cpu(nr, cpu);
+	curr_time = sched_clock();
+	diff = curr_time - per_cpu(last_time, cpu);
+	if ((s64)diff < 0) {
+		printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
+			cpu, curr_time, curr_time, per_cpu(last_time, cpu),
+			per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
+		WALT_PANIC(1);
+	}
+	per_cpu(last_time, cpu) = curr_time;
+	per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
+	per_cpu(nr, cpu) = cpu_rq(cpu)->nr_running + enq;
+
+	if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
+
+	/* Don't update hyst time for misfit tasks */
+	if (enq)
+		update_busy_hyst_end_time(cpu, enq, nr_running, curr_time);
+
+	per_cpu(nr_prod_sum, cpu) += nr_running * diff;
+	per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
+	per_cpu(nr_trailblazer_prod_sum, cpu) += (u64) walt_trailblazer_tasks(cpu) * diff;
+	spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
+}
+
+/*
+ * Returns the CPU utilization % in the last window.
+ */
+unsigned int sched_get_cpu_util_pct(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u64 util;
+	unsigned long capacity, flags;
+	unsigned int busy;
+	struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
+
+	raw_spin_lock_irqsave(&rq->__lock, flags);
+
+	capacity = capacity_orig_of(cpu);
+
+	util = wrq->prev_runnable_sum + wrq->grp_time.prev_runnable_sum;
+	util = scale_time_to_util(util);
+	raw_spin_unlock_irqrestore(&rq->__lock, flags);
+
+	util = (util >= capacity) ? capacity : util;
+	busy = div64_ul((util * 100), capacity);
+	return busy;
+}
+
+int sched_lpm_disallowed_time(int cpu, u64 *timeout)
+{
+	u64 now = sched_clock();
+	u64 bias_end_time = atomic64_read(&per_cpu(busy_hyst_end_time, cpu));
+
+	if (unlikely(walt_disabled))
+		return -EAGAIN;
+
+	if (unlikely(is_reserved(cpu))) {
+		*timeout = 10 * NSEC_PER_MSEC;
+		return 0; /* shallowest c-state */
+	}
+
+	if (now < bias_end_time) {
+		*timeout = bias_end_time - now;
+		return 0; /* shallowest c-state */
+	}
+
+	return INT_MAX; /* don't care */
+}
+EXPORT_SYMBOL_GPL(sched_lpm_disallowed_time);
+
+void update_smart_freq_legacy_reason_hyst_time(struct walt_sched_cluster *cluster)
+{
+	int cpu, i;
+	u8 max_hyst_ms;
+
+	for_each_cpu(cpu, &cluster->cpus) {
+		max_hyst_ms = 0;
+		for (i = 0; i < LEGACY_SMART_FREQ; i++) {
+			if (cluster->smart_freq_info->cluster_active_reason & BIT(i))
+				max_hyst_ms =
+					max(smart_freq_legacy_reason_hyst_ms[i][cpu],
+						max_hyst_ms);
+		}
+		per_cpu(smart_freq_legacy_reason_hyst_ns, cpu) = max_hyst_ms * NSEC_PER_MSEC;
+	}
+}
--- a/kernel/sched/walt/smart_freq.c
+++ b/kernel/sched/walt/smart_freq.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/tick.h>
+#include "walt.h"
+#include "trace.h"
+#include <trace/events/power.h>
+
+bool smart_freq_init_done;
+char reason_dump[1024];
+static DEFINE_MUTEX(freq_reason_mutex);
+
+int sched_smart_freq_legacy_dump_handler(struct ctl_table *table, int write,
+					 void __user *buffer, size_t *lenp,
+					 loff_t *ppos)
+{
+	int ret = -EINVAL, pos = 0, i, j;
+
+	if (!smart_freq_init_done)
+		return -EINVAL;
+
+	mutex_lock(&freq_reason_mutex);
+	for (j = 0; j < num_sched_clusters; j++) {
+		for (i = 0; i < LEGACY_SMART_FREQ; i++) {
+			pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%llu:%d\n", j, i,
+			       default_freq_config[j].legacy_reason_config[i].freq_allowed,
+			       default_freq_config[j].legacy_reason_config[i].hyst_ns,
+			       !!(default_freq_config[j].smart_freq_participation_mask &
+				  BIT(i)));
+		}
+	}
+
+	ret = proc_dostring(table, write, buffer, lenp, ppos);
+	mutex_unlock(&freq_reason_mutex);
+
+	return ret;
+}
+
+int sched_smart_freq_ipc_dump_handler(struct ctl_table *table, int write,
+					 void __user *buffer, size_t *lenp,
+					 loff_t *ppos)
+{
+	int ret = -EINVAL, pos = 0, i, j;
+
+	if (!smart_freq_init_done)
+		return -EINVAL;
+
+	mutex_lock(&freq_reason_mutex);
+
+	for (j = 0; j < num_sched_clusters; j++) {
+		for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
+			pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%lu:%llu:%d\n", j, i,
+			       default_freq_config[j].ipc_reason_config[i].ipc,
+			       default_freq_config[j].ipc_reason_config[i].freq_allowed,
+			       default_freq_config[j].ipc_reason_config[i].hyst_ns,
+			       !!(default_freq_config[j].smart_freq_ipc_participation_mask &
+					BIT(i)));
+		}
+	}
+
+	ret = proc_dostring(table, write, buffer, lenp, ppos);
+	mutex_unlock(&freq_reason_mutex);
+
+	return ret;
+}
+
+int sched_smart_freq_ipc_handler(struct ctl_table *table, int write,
+				      void __user *buffer, size_t *lenp,
+				      loff_t *ppos)
+{
+	int ret;
+	int cluster_id = -1;
+	unsigned long no_reason_freq;
+	int i;
+	unsigned int *data = (unsigned int *)table->data;
+	int val[SMART_FMAX_IPC_MAX];
+	struct ctl_table tmp = {
+		.data	= &val,
+		.maxlen	= sizeof(int) * SMART_FMAX_IPC_MAX,
+		.mode	= table->mode,
+	};
+
+	if (!smart_freq_init_done)
+		return -EINVAL;
+
+	mutex_lock(&freq_reason_mutex);
+
+	if (!write) {
+		tmp.data = table->data;
+		ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+		goto unlock;
+	}
+
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (ret)
+		goto unlock;
+
+	ret = -EINVAL;
+
+	if (data == &sysctl_ipc_freq_levels_cluster0[0])
+		cluster_id = 0;
+	if (data == &sysctl_ipc_freq_levels_cluster1[0])
+		cluster_id = 1;
+	if (data == &sysctl_ipc_freq_levels_cluster2[0])
+		cluster_id = 2;
+	if (data == &sysctl_ipc_freq_levels_cluster3[0])
+		cluster_id = 3;
+	if (cluster_id == -1)
+		goto unlock;
+
+	if (val[0] < 0)
+		goto unlock;
+
+	no_reason_freq = val[0];
+
+	/* Make sure all reasons freq are larger than NO_REASON */
+	/* IPC/freq should be in increasing order */
+	for (i = 1; i < SMART_FMAX_IPC_MAX; i++) {
+		if (val[i] < val[i-1])
+			goto unlock;
+	}
+
+	default_freq_config[cluster_id].legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed =
+		no_reason_freq;
+
+	for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
+		default_freq_config[cluster_id].ipc_reason_config[i].freq_allowed = val[i];
+		data[i] = val[i];
+	}
+	ret = 0;
+
+unlock:
+	mutex_unlock(&freq_reason_mutex);
+	return ret;
+}
+
+/* return highest ipc of the cluster */
+unsigned int get_cluster_ipc_level_freq(int curr_cpu, u64 time)
+{
+	int cpu, winning_cpu, cpu_ipc_level = 0, index = 0;
+	struct walt_sched_cluster *cluster = cpu_cluster(curr_cpu);
+	struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
+
+	if (!smart_freq_init_done)
+		return 0;
+
+	for_each_cpu(cpu, &cluster->cpus) {
+		cpu_ipc_level = per_cpu(ipc_level, cpu);
+
+		if ((time - per_cpu(last_ipc_update, cpu)) > 7999999ULL) {
+			cpu_ipc_level = 0;
+			per_cpu(tickless_mode, cpu) = true;
+		} else {
+			per_cpu(tickless_mode, cpu) = false;
+		}
+
+
+		if (cpu_ipc_level >= index) {
+			winning_cpu = cpu;
+			index = cpu_ipc_level;
+		}
+	}
+
+	smart_freq_info->cluster_ipc_level = index;
+
+	trace_ipc_freq(cluster->id, winning_cpu, index,
+		smart_freq_info->ipc_reason_config[index].freq_allowed,
+		time, per_cpu(ipc_deactivate_ns, winning_cpu), curr_cpu,
+		per_cpu(ipc_cnt, curr_cpu));
+
+	return smart_freq_info->ipc_reason_config[index].freq_allowed;
+}
+
+static inline bool has_internal_freq_limit_changed(struct walt_sched_cluster *cluster)
+{
+	unsigned int internal_freq, ipc_freq;
+	int i;
+	struct smart_freq_cluster_info *smci = cluster->smart_freq_info;
+
+	internal_freq = cluster->walt_internal_freq_limit;
+	cluster->walt_internal_freq_limit = cluster->max_freq;
+
+	for (i = 0; i < MAX_FREQ_CAP; i++)
+		cluster->walt_internal_freq_limit = min(freq_cap[i][cluster->id],
+				     cluster->walt_internal_freq_limit);
+
+	ipc_freq = smci->ipc_reason_config[smci->cluster_ipc_level].freq_allowed;
+	cluster->walt_internal_freq_limit = max(ipc_freq,
+			     cluster->walt_internal_freq_limit);
+
+	return cluster->walt_internal_freq_limit != internal_freq;
+}
+
+void update_smart_freq_capacities_one_cluster(struct walt_sched_cluster *cluster)
+{
+	int cpu;
+
+	if (!smart_freq_init_done)
+		return;
+
+	if (has_internal_freq_limit_changed(cluster)) {
+		for_each_cpu(cpu, &cluster->cpus)
+			update_cpu_capacity_helper(cpu);
+	}
+}
+
+void update_smart_freq_capacities(void)
+{
+	struct walt_sched_cluster *cluster;
+
+	if (!smart_freq_init_done)
+		return;
+
+	for_each_sched_cluster(cluster)
+		update_smart_freq_capacities_one_cluster(cluster);
+}
+
+/*
+ *  Update the active smart freq reason for the cluster.
+ */
+static void smart_freq_update_one_cluster(struct walt_sched_cluster *cluster,
+			uint32_t current_reasons, u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
+{
+	uint32_t current_reason, cluster_active_reason;
+	struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
+	unsigned long max_cap =
+		smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
+	int max_reason, i;
+	unsigned long old_freq_cap = freq_cap[SMART_FREQ][cluster->id];
+	struct rq *rq;
+	char smart_freq[25] = {0};
+	char smart_freq_reason[25] = {0};
+
+	for (i = 0; i < LEGACY_SMART_FREQ; i++) {
+		current_reason = current_reasons & BIT(i);
+		cluster_active_reason = smart_freq_info->cluster_active_reason & BIT(i);
+
+		if (current_reason) {
+			smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
+			smart_freq_info->cluster_active_reason |= BIT(i);
+
+			if (i == TRAILBLAZER_SMART_FREQ)
+				trail_active = true;
+			else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
+				sustain_active = true;
+
+		} else if (cluster_active_reason) {
+			if (!smart_freq_info->legacy_reason_status[i].deactivate_ns)
+				smart_freq_info->legacy_reason_status[i].deactivate_ns = wallclock;
+		}
+
+		if (cluster_active_reason) {
+			/*
+			 * For reasons with deactivation hysteresis, check here if we have
+			 * crossed the hysteresis time and then deactivate the reason.
+			 * We are relying on scheduler tick path to call this function
+			 * thus deactivation of reason is only at tick
+			 * boundary.
+			 */
+			if (smart_freq_info->legacy_reason_status[i].deactivate_ns) {
+				u64 delta = wallclock -
+					smart_freq_info->legacy_reason_status[i].deactivate_ns;
+				if (delta >= smart_freq_info->legacy_reason_config[i].hyst_ns) {
+					smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
+					smart_freq_info->cluster_active_reason &= ~BIT(i);
+
+					if (i == TRAILBLAZER_SMART_FREQ)
+						trail_active = false;
+					else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
+						sustain_active = false;
+
+					continue;
+				}
+			}
+			if (max_cap < smart_freq_info->legacy_reason_config[i].freq_allowed) {
+				max_cap = smart_freq_info->legacy_reason_config[i].freq_allowed;
+				max_reason = i;
+			}
+		}
+	}
+
+	if (enable_logging) {
+		snprintf(smart_freq, sizeof(smart_freq), "smart_fmax_%d", cluster->id);
+		trace_clock_set_rate(smart_freq, max_cap, raw_smp_processor_id());
+
+		snprintf(smart_freq_reason, sizeof(smart_freq_reason), "legacy_reason_%d", cluster->id);
+		trace_clock_set_rate(smart_freq_reason, max_reason, raw_smp_processor_id());
+	}
+
+	trace_sched_freq_uncap(cluster->id, nr_big, wakeup_ctr_sum, current_reasons,
+				smart_freq_info->cluster_active_reason, max_cap, max_reason);
+
+	if (old_freq_cap == max_cap)
+		return;
+
+	freq_cap[SMART_FREQ][cluster->id] = max_cap;
+
+	rq = cpu_rq(cpumask_first(&cluster->cpus));
+	/*
+	 * cpufreq smart freq doesn't call get_util for the cpu, hence
+	 * invoking callback without rq lock is safe.
+	 */
+	waltgov_run_callback(rq, WALT_CPUFREQ_SMART_FREQ_BIT);
+}
+
+#define UNCAP_THRES		300000000
+#define UTIL_THRESHOLD		90
+static bool thres_based_uncap(u64 window_start, struct walt_sched_cluster *cluster)
+{
+	int cpu;
+	bool cluster_high_load = false, sustained_load = false;
+	unsigned long freq_capacity, tgt_cap;
+	unsigned long tgt_freq =
+		cluster->smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
+	struct walt_rq *wrq;
+
+	freq_capacity = arch_scale_cpu_capacity(cpumask_first(&cluster->cpus));
+	tgt_cap = mult_frac(freq_capacity, tgt_freq, cluster->max_possible_freq);
+
+	for_each_cpu(cpu, &cluster->cpus) {
+		wrq = &per_cpu(walt_rq, cpu);
+		if (wrq->util >= mult_frac(tgt_cap, UTIL_THRESHOLD, 100)) {
+			cluster_high_load = true;
+			if (!cluster->found_ts)
+				cluster->found_ts = window_start;
+			else if ((window_start - cluster->found_ts) >= UNCAP_THRES)
+				sustained_load = true;
+
+			break;
+		}
+	}
+	if (!cluster_high_load)
+		cluster->found_ts = 0;
+
+	return sustained_load;
+}
+
+unsigned int big_task_cnt = 6;
+#define WAKEUP_CNT		100
+/*
+ * reason is a two part bitmap
+ * 15 - 0 : reason type
+ * 31 - 16: changed state of reason
+ * this will help to pass multiple reasons at once and avoid multiple calls.
+ */
+/*
+ * This will be called from irq work path only
+ */
+void smart_freq_update_reason_common(u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
+{
+	struct walt_sched_cluster *cluster;
+	bool current_state;
+	uint32_t cluster_reasons;
+	int i;
+	int cluster_active_reason;
+	uint32_t cluster_participation_mask;
+	bool sustained_load = false;
+
+	if (!smart_freq_init_done)
+		return;
+
+	for_each_sched_cluster(cluster)
+		sustained_load |= thres_based_uncap(wallclock, cluster);
+
+	for_each_sched_cluster(cluster) {
+		cluster_reasons = 0;
+		i = cluster->id;
+		cluster_participation_mask =
+			cluster->smart_freq_info->smart_freq_participation_mask;
+		/*
+		 *  NO_REASON
+		 */
+		if (cluster_participation_mask & BIT(NO_REASON_SMART_FREQ))
+			cluster_reasons |= BIT(NO_REASON_SMART_FREQ);
+
+		/*
+		 * BOOST
+		 */
+		if (cluster_participation_mask & BIT(BOOST_SMART_FREQ)) {
+			current_state = is_storage_boost() || is_full_throttle_boost();
+			if (current_state)
+				cluster_reasons |= BIT(BOOST_SMART_FREQ);
+		}
+
+		/*
+		 * TRAILBLAZER
+		 */
+		if (cluster_participation_mask & BIT(TRAILBLAZER_SMART_FREQ)) {
+			current_state = trailblazer_state;
+			if (current_state)
+				cluster_reasons |= BIT(TRAILBLAZER_SMART_FREQ);
+		}
+
+		/*
+		 * SBT
+		 */
+		if (cluster_participation_mask & BIT(SBT_SMART_FREQ)) {
+			current_state = prev_is_sbt;
+			if (current_state)
+				cluster_reasons |= BIT(SBT_SMART_FREQ);
+		}
+
+		/*
+		 * BIG_TASKCNT
+		 */
+		if (cluster_participation_mask & BIT(BIG_TASKCNT_SMART_FREQ)) {
+			current_state = (nr_big >= big_task_cnt) &&
+						(wakeup_ctr_sum < WAKEUP_CNT);
+			if (current_state)
+				cluster_reasons |= BIT(BIG_TASKCNT_SMART_FREQ);
+		}
+
+		/*
+		 * SUSTAINED_HIGH_UTIL
+		 */
+		if (cluster_participation_mask & BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ)) {
+			current_state = sustained_load;
+			if (current_state)
+				cluster_reasons |= BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ);
+		}
+
+		/*
+		 * PIPELINE_60FPS_OR_LESSER
+		 */
+		if (cluster_participation_mask &
+				BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ)) {
+			current_state = pipeline_in_progress() &&
+						sched_ravg_window >= SCHED_RAVG_16MS_WINDOW;
+			if (current_state)
+				cluster_reasons |=
+					BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ);
+		}
+
+		/*
+		 * PIPELINE_90FPS
+		 */
+		if (cluster_participation_mask &
+				BIT(PIPELINE_90FPS_SMART_FREQ)) {
+			current_state = pipeline_in_progress() &&
+						sched_ravg_window == SCHED_RAVG_12MS_WINDOW;
+			if (current_state)
+				cluster_reasons |=
+					BIT(PIPELINE_90FPS_SMART_FREQ);
+		}
+
+		/*
+		 * PIPELINE_120FPS_OR_GREATER
+		 */
+		if (cluster_participation_mask &
+				BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ)) {
+			current_state = pipeline_in_progress() &&
+						sched_ravg_window == SCHED_RAVG_8MS_WINDOW;
+			if (current_state)
+				cluster_reasons |=
+					BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ);
+		}
+
+		/*
+		 * THERMAL_ROTATION
+		 */
+		if (cluster_participation_mask & BIT(THERMAL_ROTATION_SMART_FREQ)) {
+			current_state = (oscillate_cpu != -1);
+			if (current_state)
+				cluster_reasons |= BIT(THERMAL_ROTATION_SMART_FREQ);
+		}
+
+		cluster_active_reason = cluster->smart_freq_info->cluster_active_reason;
+		/* update the reasons for all the clusters */
+		if (cluster_reasons || cluster_active_reason)
+			smart_freq_update_one_cluster(cluster, cluster_reasons, wallclock,
+						      nr_big, wakeup_ctr_sum);
+	}
+}
+
+/* Common config for 4 cluster system */
+struct smart_freq_cluster_info default_freq_config[MAX_CLUSTERS];
+
+void smart_freq_init(const char *name)
+{
+	struct walt_sched_cluster *cluster;
+	int i = 0, j;
+
+	for_each_sched_cluster(cluster) {
+		cluster->smart_freq_info = &default_freq_config[i];
+		cluster->smart_freq_info->smart_freq_participation_mask = BIT(NO_REASON_SMART_FREQ);
+		cluster->smart_freq_info->cluster_active_reason = 0;
+		cluster->smart_freq_info->min_cycles = 100;
+		cluster->smart_freq_info->smart_freq_ipc_participation_mask = 0;
+		freq_cap[SMART_FREQ][cluster->id] = FREQ_QOS_MAX_DEFAULT_VALUE;
+
+		memset(cluster->smart_freq_info->legacy_reason_status, 0,
+		       sizeof(struct smart_freq_legacy_reason_status) *
+		       LEGACY_SMART_FREQ);
+		memset(cluster->smart_freq_info->legacy_reason_config, 0,
+		       sizeof(struct smart_freq_legacy_reason_config) *
+		       LEGACY_SMART_FREQ);
+		memset(cluster->smart_freq_info->ipc_reason_config, 0,
+		       sizeof(struct smart_freq_ipc_reason_config) *
+		       SMART_FMAX_IPC_MAX);
+
+		for (j = 0; j < LEGACY_SMART_FREQ; j++) {
+			cluster->smart_freq_info->legacy_reason_config[j].freq_allowed =
+				FREQ_QOS_MAX_DEFAULT_VALUE;
+		}
+		for (j = 0; j < SMART_FMAX_IPC_MAX; j++) {
+			cluster->smart_freq_info->ipc_reason_config[j].freq_allowed =
+				FREQ_QOS_MAX_DEFAULT_VALUE;
+			sysctl_ipc_freq_levels_cluster0[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
+			sysctl_ipc_freq_levels_cluster1[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
+			sysctl_ipc_freq_levels_cluster2[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
+			sysctl_ipc_freq_levels_cluster3[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
+		}
+
+		i++;
+	}
+
+	if (!strcmp(name, "SUN")) {
+		for_each_sched_cluster(cluster) {
+			if (cluster->id == 0) {
+				/* Legacy */
+				cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
+					2400000;
+				cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
+					1000000000;
+				cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
+					1000000000;
+				cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
+					300000000;
+				cluster->smart_freq_info->smart_freq_participation_mask |=
+					BIT(BOOST_SMART_FREQ) |
+					BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
+					BIT(BIG_TASKCNT_SMART_FREQ) |
+					BIT(TRAILBLAZER_SMART_FREQ) |
+					BIT(SBT_SMART_FREQ) |
+					BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
+					BIT(PIPELINE_90FPS_SMART_FREQ) |
+					BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
+					BIT(THERMAL_ROTATION_SMART_FREQ);
+
+				/* IPC */
+				cluster->smart_freq_info->ipc_reason_config[0].ipc = 120;
+				cluster->smart_freq_info->ipc_reason_config[1].ipc = 180;
+				cluster->smart_freq_info->ipc_reason_config[2].ipc = 220;
+				cluster->smart_freq_info->ipc_reason_config[3].ipc = 260;
+				cluster->smart_freq_info->ipc_reason_config[4].ipc = 300;
+				cluster->smart_freq_info->smart_freq_ipc_participation_mask =
+					BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
+					BIT(IPC_E);
+				cluster->smart_freq_info->min_cycles = 5806080;
+			} else if (cluster->id == 1) {
+				/* Legacy */
+				cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
+					3513600;
+				cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
+					1000000000;
+				cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
+					1000000000;
+				cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
+					300000000;
+				cluster->smart_freq_info->smart_freq_participation_mask |=
+					BIT(BOOST_SMART_FREQ) |
+					BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
+					BIT(BIG_TASKCNT_SMART_FREQ) |
+					BIT(TRAILBLAZER_SMART_FREQ) |
+					BIT(SBT_SMART_FREQ) |
+					BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
+					BIT(PIPELINE_90FPS_SMART_FREQ) |
+					BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
+					BIT(THERMAL_ROTATION_SMART_FREQ);
+
+				/* IPC */
+				cluster->smart_freq_info->ipc_reason_config[0].ipc = 220;
+				cluster->smart_freq_info->ipc_reason_config[1].ipc = 260;
+				cluster->smart_freq_info->ipc_reason_config[2].ipc = 280;
+				cluster->smart_freq_info->ipc_reason_config[3].ipc = 320;
+				cluster->smart_freq_info->ipc_reason_config[4].ipc = 400;
+				cluster->smart_freq_info->smart_freq_ipc_participation_mask =
+					BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
+					BIT(IPC_E);
+				cluster->smart_freq_info->min_cycles = 7004160;
+			}
+		}
+	}
+	smart_freq_init_done = true;
+	update_smart_freq_capacities();
+
+}
--- a/kernel/sched/walt/sysctl.c
+++ b/kernel/sched/walt/sysctl.c
--- a/kernel/sched/walt/trace.c
+++ b/kernel/sched/walt/trace.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
+ */
+
+#include "walt.h"
+
+static inline void __window_data(u32 *dst, u32 *src)
+{
+	if (src)
+		memcpy(dst, src, nr_cpu_ids * sizeof(u32));
+	else
+		memset(dst, 0, nr_cpu_ids * sizeof(u32));
+}
+
+struct trace_seq;
+const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
+{
+	int i;
+	const char *ret = p->buffer + seq_buf_used(&p->seq);
+
+	for (i = 0; i < buf_len; i++)
+		trace_seq_printf(p, "%u ", buf[i]);
+
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+
+static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
+{
+	struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
+
+	if (curr)
+		if (new)
+			return wrq->nt_curr_runnable_sum;
+		else
+			return wrq->curr_runnable_sum;
+	else
+		if (new)
+			return wrq->nt_prev_runnable_sum;
+		else
+			return wrq->prev_runnable_sum;
+}
+
+static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
+{
+	struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
+
+	if (curr)
+		if (new)
+			return wrq->grp_time.nt_curr_runnable_sum;
+		else
+			return wrq->grp_time.curr_runnable_sum;
+	else
+		if (new)
+			return wrq->grp_time.nt_prev_runnable_sum;
+		else
+			return wrq->grp_time.prev_runnable_sum;
+}
+
+static inline s64
+__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
+		 bool src, bool new, bool curr)
+{
+	switch (migrate_type) {
+	case RQ_TO_GROUP:
+		if (src)
+			return __rq_update_sum(rq, curr, new);
+		else
+			return __grp_update_sum(rq, curr, new);
+	case GROUP_TO_RQ:
+		if (src)
+			return __grp_update_sum(rq, curr, new);
+		else
+			return __rq_update_sum(rq, curr, new);
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+}
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
--- a/kernel/sched/walt/trace.h
+++ b/kernel/sched/walt/trace.h
--- a/kernel/sched/walt/walt.c
+++ b/kernel/sched/walt/walt.c
--- a/kernel/sched/walt/walt.h
+++ b/kernel/sched/walt/walt.h
--- a/kernel/sched/walt/walt_cfs.c
+++ b/kernel/sched/walt/walt_cfs.c
--- a/kernel/sched/walt/walt_config.c
+++ b/kernel/sched/walt/walt_config.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "walt.h"
+#include "trace.h"
+#include <soc/qcom/socinfo.h>
+
+unsigned long __read_mostly soc_flags;
+unsigned int trailblazer_floor_freq[MAX_CLUSTERS];
+cpumask_t asym_cap_sibling_cpus;
+cpumask_t pipeline_sync_cpus;
+int oscillate_period_ns;
+int soc_sched_lib_name_capacity;
+#define PIPELINE_BUSY_THRESH_8MS_WINDOW 7
+#define PIPELINE_BUSY_THRESH_12MS_WINDOW 11
+#define PIPELINE_BUSY_THRESH_16MS_WINDOW 15
+
+void walt_config(void)
+{
+	int i, j, cpu;
+	const char *name = socinfo_get_id_string();
+
+	sysctl_sched_group_upmigrate_pct = 100;
+	sysctl_sched_group_downmigrate_pct = 95;
+	sysctl_sched_task_unfilter_period = 100000000;
+	sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
+	sysctl_sched_ravg_window_nr_ticks = (HZ / NR_WINDOWS_PER_SEC);
+	sched_load_granule = DEFAULT_SCHED_RAVG_WINDOW / NUM_LOAD_INDICES;
+	sysctl_sched_coloc_busy_hyst_enable_cpus = 112;
+	sysctl_sched_util_busy_hyst_enable_cpus = 255;
+	sysctl_sched_coloc_busy_hyst_max_ms = 5000;
+	sched_ravg_window = DEFAULT_SCHED_RAVG_WINDOW;
+	sysctl_input_boost_ms = 40;
+	sysctl_sched_min_task_util_for_boost = 51;
+	sysctl_sched_min_task_util_for_uclamp = 51;
+	sysctl_sched_min_task_util_for_colocation = 35;
+	sysctl_sched_many_wakeup_threshold = WALT_MANY_WAKEUP_DEFAULT;
+	sysctl_walt_rtg_cfs_boost_prio = 99; /* disabled by default */
+	sysctl_sched_sync_hint_enable = 1;
+	sysctl_sched_skip_sp_newly_idle_lb = 1;
+	sysctl_sched_hyst_min_coloc_ns = 80000000;
+	sysctl_sched_idle_enough = SCHED_IDLE_ENOUGH_DEFAULT;
+	sysctl_sched_cluster_util_thres_pct = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
+	sysctl_em_inflate_pct = 100;
+	sysctl_em_inflate_thres = 1024;
+	sysctl_max_freq_partial_halt = FREQ_QOS_MAX_DEFAULT_VALUE;
+	asym_cap_sibling_cpus = CPU_MASK_NONE;
+	pipeline_sync_cpus = CPU_MASK_NONE;
+	for_each_possible_cpu(cpu) {
+		for (i = 0; i < LEGACY_SMART_FREQ; i++) {
+			if (i)
+				smart_freq_legacy_reason_hyst_ms[i][cpu] = 4;
+			else
+				smart_freq_legacy_reason_hyst_ms[i][cpu] = 0;
+		}
+	}
+
+	for (i = 0; i < MAX_MARGIN_LEVELS; i++) {
+		sysctl_sched_capacity_margin_up_pct[i] = 95; /* ~5% margin */
+		sysctl_sched_capacity_margin_dn_pct[i] = 85; /* ~15% margin */
+		sysctl_sched_early_up[i] = 1077;
+		sysctl_sched_early_down[i] = 1204;
+	}
+
+	for (i = 0; i < WALT_NR_CPUS; i++) {
+		sysctl_sched_coloc_busy_hyst_cpu[i] = 39000000;
+		sysctl_sched_coloc_busy_hyst_cpu_busy_pct[i] = 10;
+		sysctl_sched_util_busy_hyst_cpu[i] = 5000000;
+		sysctl_sched_util_busy_hyst_cpu_util[i] = 15;
+		sysctl_input_boost_freq[i] = 0;
+	}
+
+	for (i = 0; i < MAX_CLUSTERS; i++) {
+		sysctl_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
+		high_perf_cluster_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
+		sysctl_sched_idle_enough_clust[i] = SCHED_IDLE_ENOUGH_DEFAULT;
+		sysctl_sched_cluster_util_thres_pct_clust[i] = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
+		trailblazer_floor_freq[i] = 0;
+		for (j = 0; j < MAX_CLUSTERS; j++) {
+			load_sync_util_thres[i][j] = 0;
+			load_sync_low_pct[i][j] = 0;
+			load_sync_high_pct[i][j] = 0;
+		}
+	}
+
+	for (i = 0; i < MAX_FREQ_CAP; i++) {
+		for (j = 0; j < MAX_CLUSTERS; j++)
+			freq_cap[i][j] = FREQ_QOS_MAX_DEFAULT_VALUE;
+	}
+
+	sysctl_sched_lrpb_active_ms[0] = PIPELINE_BUSY_THRESH_8MS_WINDOW;
+	sysctl_sched_lrpb_active_ms[1] = PIPELINE_BUSY_THRESH_12MS_WINDOW;
+	sysctl_sched_lrpb_active_ms[2] = PIPELINE_BUSY_THRESH_16MS_WINDOW;
+	soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
+	soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
+	soc_feat_set(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
+	soc_feat_set(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
+	soc_feat_set(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
+	soc_feat_set(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
+	soc_feat_set(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
+
+	sysctl_pipeline_special_task_util_thres = 100;
+	sysctl_pipeline_non_special_task_util_thres = 200;
+	sysctl_pipeline_pin_thres_low_pct = 50;
+	sysctl_pipeline_pin_thres_high_pct = 60;
+
+	/* return if socinfo is not available */
+	if (!name)
+		return;
+
+	if (!strcmp(name, "SUN")) {
+		sysctl_sched_suppress_region2		= 1;
+		soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
+		soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
+		soc_feat_unset(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
+		soc_feat_unset(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
+		trailblazer_floor_freq[0] = 1000000;
+		debugfs_walt_features |= WALT_FEAT_TRAILBLAZER_BIT;
+		debugfs_walt_features |= WALT_FEAT_UCLAMP_FREQ_BIT;
+		soc_feat_unset(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
+		soc_feat_set(SOC_ENABLE_FT_BOOST_TO_ALL);
+		oscillate_period_ns = 8000000;
+		soc_feat_set(SOC_ENABLE_EXPERIMENT3);
+		/*G + P*/
+		cpumask_copy(&pipeline_sync_cpus, cpu_possible_mask);
+		soc_sched_lib_name_capacity = 2;
+		soc_feat_unset(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
+
+		sysctl_cluster01_load_sync[0]	= 350;
+		sysctl_cluster01_load_sync[1]	= 100;
+		sysctl_cluster01_load_sync[2]	= 100;
+		sysctl_cluster10_load_sync[0]	= 512;
+		sysctl_cluster10_load_sync[1]	= 90;
+		sysctl_cluster10_load_sync[2]	= 90;
+		load_sync_util_thres[0][1]	= sysctl_cluster01_load_sync[0];
+		load_sync_low_pct[0][1]		= sysctl_cluster01_load_sync[1];
+		load_sync_high_pct[0][1]	= sysctl_cluster01_load_sync[2];
+		load_sync_util_thres[1][0]	= sysctl_cluster10_load_sync[0];
+		load_sync_low_pct[1][0]		= sysctl_cluster10_load_sync[1];
+		load_sync_high_pct[1][0]	= sysctl_cluster10_load_sync[2];
+
+		sysctl_cluster01_load_sync_60fps[0]	= 400;
+		sysctl_cluster01_load_sync_60fps[1]	= 60;
+		sysctl_cluster01_load_sync_60fps[2]	= 100;
+		sysctl_cluster10_load_sync_60fps[0]	= 500;
+		sysctl_cluster10_load_sync_60fps[1]	= 70;
+		sysctl_cluster10_load_sync_60fps[2]	= 90;
+		load_sync_util_thres_60fps[0][1]	= sysctl_cluster01_load_sync_60fps[0];
+		load_sync_low_pct_60fps[0][1]		= sysctl_cluster01_load_sync_60fps[1];
+		load_sync_high_pct_60fps[0][1]		= sysctl_cluster01_load_sync_60fps[2];
+		load_sync_util_thres_60fps[1][0]	= sysctl_cluster10_load_sync_60fps[0];
+		load_sync_low_pct_60fps[1][0]		= sysctl_cluster10_load_sync_60fps[1];
+		load_sync_high_pct_60fps[1][0]		= sysctl_cluster10_load_sync_60fps[2];
+
+		/* CPU0 needs an 9mS bias for all legacy smart freq reasons */
+		for (i = 1; i < LEGACY_SMART_FREQ; i++)
+			smart_freq_legacy_reason_hyst_ms[i][0] = 9;
+		for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
+			for (i = 1; i < LEGACY_SMART_FREQ; i++)
+				smart_freq_legacy_reason_hyst_ms[i][cpu] = 2;
+		}
+		for_each_possible_cpu(cpu) {
+			smart_freq_legacy_reason_hyst_ms[PIPELINE_60FPS_OR_LESSER_SMART_FREQ][cpu] =
+				1;
+		}
+		soc_feat_unset(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
+	} else if (!strcmp(name, "PINEAPPLE")) {
+		soc_feat_set(SOC_ENABLE_SILVER_RT_SPREAD_BIT);
+		soc_feat_set(SOC_ENABLE_BOOST_TO_NEXT_CLUSTER_BIT);
+
+		/* T + G */
+		cpumask_or(&asym_cap_sibling_cpus,
+			&asym_cap_sibling_cpus, &cpu_array[0][1]);
+		cpumask_or(&asym_cap_sibling_cpus,
+			&asym_cap_sibling_cpus, &cpu_array[0][2]);
+
+		/*
+		 * Treat Golds and Primes as candidates for load sync under pipeline usecase.
+		 * However, it is possible that a single CPU is not present. As prime is the
+		 * only cluster with only one CPU, guard this setting by ensuring 4 clusters
+		 * are present.
+		 */
+		if (num_sched_clusters == 4) {
+			cpumask_or(&pipeline_sync_cpus,
+				&pipeline_sync_cpus, &cpu_array[0][2]);
+			cpumask_or(&pipeline_sync_cpus,
+				&pipeline_sync_cpus, &cpu_array[0][3]);
+		}
+
+		sysctl_cluster23_load_sync[0]	= 350;
+		sysctl_cluster23_load_sync[1]	= 100;
+		sysctl_cluster23_load_sync[2]	= 100;
+		sysctl_cluster32_load_sync[0]	= 512;
+		sysctl_cluster32_load_sync[1]	= 90;
+		sysctl_cluster32_load_sync[2]	= 90;
+		load_sync_util_thres[2][3]	= sysctl_cluster23_load_sync[0];
+		load_sync_low_pct[2][3]		= sysctl_cluster23_load_sync[1];
+		load_sync_high_pct[2][3]	= sysctl_cluster23_load_sync[2];
+		load_sync_util_thres[3][2]	= sysctl_cluster32_load_sync[0];
+		load_sync_low_pct[3][2]		= sysctl_cluster32_load_sync[1];
+		load_sync_high_pct[3][2]	= sysctl_cluster32_load_sync[2];
+	}
+
+	smart_freq_init(name);
+}
--- a/kernel/sched/walt/walt_cpufreq_cycle_cntr_driver.c
+++ b/kernel/sched/walt/walt_cpufreq_cycle_cntr_driver.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/topology.h>
+
+#include "walt.h"
+
+#define CYCLE_CNTR_OFFSET(core_id, acc_count)		\
+				(acc_count ? ((core_id + 1) * 4) : 0)
+
+struct cpufreq_counter {
+	u64 total_cycle_counter;
+	u32 prev_cycle_counter;
+	spinlock_t lock;
+};
+
+static struct cpufreq_counter walt_cpufreq_counter[NR_CPUS];
+
+struct walt_cpufreq_soc_data {
+	u32 reg_enable;
+	u32 reg_cycle_cntr;
+	bool accumulative_counter;
+};
+
+struct walt_cpufreq_data {
+	void __iomem *base;
+	const struct walt_cpufreq_soc_data *soc_data;
+};
+
+static struct walt_cpufreq_data cpufreq_data[MAX_CLUSTERS];
+
+u64 walt_cpufreq_get_cpu_cycle_counter(int cpu, u64 wc)
+{
+	const struct walt_cpufreq_soc_data *soc_data;
+	struct cpufreq_counter *cpu_counter;
+	struct walt_cpufreq_data *data;
+	u64 cycle_counter_ret;
+	unsigned long flags;
+	u16 offset;
+	u32 val;
+
+	data = &cpufreq_data[cpu_cluster(cpu)->id];
+	soc_data = data->soc_data;
+
+	cpu_counter = &walt_cpufreq_counter[cpu];
+	spin_lock_irqsave(&cpu_counter->lock, flags);
+
+	offset = CYCLE_CNTR_OFFSET(topology_core_id(cpu),
+					soc_data->accumulative_counter);
+	val = readl_relaxed(data->base +
+					soc_data->reg_cycle_cntr + offset);
+
+	if (val < cpu_counter->prev_cycle_counter) {
+		/* Handle counter overflow */
+		cpu_counter->total_cycle_counter += UINT_MAX -
+			cpu_counter->prev_cycle_counter + val;
+		cpu_counter->prev_cycle_counter = val;
+	} else {
+		cpu_counter->total_cycle_counter += val -
+			cpu_counter->prev_cycle_counter;
+		cpu_counter->prev_cycle_counter = val;
+	}
+	cycle_counter_ret = cpu_counter->total_cycle_counter;
+	spin_unlock_irqrestore(&cpu_counter->lock, flags);
+
+	pr_debug("CPU %u, core-id 0x%x, offset %u cycle_counts=%llu\n",
+			cpu, topology_core_id(cpu), offset, cycle_counter_ret);
+
+	return cycle_counter_ret;
+}
+
+static int walt_cpufreq_cycle_cntr_driver_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct of_phandle_args args;
+	struct device_node *cpu_np;
+	void __iomem *base;
+	int ret = -ENODEV, index, cpu;
+	struct walt_sched_cluster *cluster;
+
+	for_each_sched_cluster(cluster) {
+		cpu = cluster_first_cpu(cluster);
+		cpu_np = of_cpu_device_node_get(cpu);
+		if (!cpu_np)
+			return -EINVAL;
+
+		ret = of_parse_phandle_with_args(cpu_np, "qcom,freq-domain",
+					 "#freq-domain-cells", 0, &args);
+		of_node_put(cpu_np);
+		if (ret)
+			return ret;
+
+		index = args.args[0];
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+		if (!res) {
+			dev_err(dev, "failed to get mem resource %d\n", index);
+			return -ENODEV;
+		}
+
+		base = devm_ioremap(dev, res->start, resource_size(res));
+		if (!base) {
+			dev_err(dev, "failed to map resource %pR\n", res);
+			return -ENOMEM;
+		}
+
+		cpufreq_data[cluster->id].soc_data = of_device_get_match_data(&pdev->dev);
+		cpufreq_data[cluster->id].base = base;
+
+		/* HW should be in enabled state to proceed */
+		if (!(readl_relaxed(base + cpufreq_data[cluster->id].soc_data->reg_enable) & 0x1)) {
+			dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index);
+			return -ENODEV;
+		}
+	}
+
+	if (!walt_get_cycle_counts_cb) {
+		for_each_possible_cpu(cpu)
+			spin_lock_init(&walt_cpufreq_counter[cpu].lock);
+		walt_get_cycle_counts_cb = walt_cpufreq_get_cpu_cycle_counter;
+		use_cycle_counter = true;
+		complete(&walt_get_cycle_counts_cb_completion);
+
+		return 0;
+	}
+
+	return ret;
+}
+
+static int walt_cpufreq_cycle_cntr_driver_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct walt_cpufreq_soc_data hw_soc_data = {
+	.reg_enable = 0x0,
+	.reg_cycle_cntr = 0x9c0,
+	.accumulative_counter = false,
+};
+
+static const struct walt_cpufreq_soc_data epss_soc_data = {
+	.reg_enable = 0x0,
+	.reg_cycle_cntr = 0x3c4,
+	.accumulative_counter = true,
+};
+
+static const struct of_device_id walt_cpufreq_cycle_cntr_match[] = {
+	{ .compatible = "qcom,cycle-cntr-hw", .data = &hw_soc_data },
+	{ .compatible = "qcom,epss", .data = &epss_soc_data },
+	{}
+};
+
+static struct platform_driver walt_cpufreq_cycle_cntr_driver = {
+	.driver = {
+		.name = "walt-cpufreq-cycle-cntr",
+		.of_match_table = walt_cpufreq_cycle_cntr_match
+	},
+	.probe = walt_cpufreq_cycle_cntr_driver_probe,
+	.remove = walt_cpufreq_cycle_cntr_driver_remove,
+};
+
+int walt_cpufreq_cycle_cntr_driver_register(void)
+{
+	return platform_driver_register(&walt_cpufreq_cycle_cntr_driver);
+}
--- a/kernel/sched/walt/walt_cycles.c
+++ b/kernel/sched/walt/walt_cycles.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <trace/hooks/cpufreq.h>
+
+#include "walt.h"
+
+struct walt_cpu_cycle {
+	rwlock_t	lock;
+	u64		cycles;
+	u64		last_time_ns;
+	unsigned int	cur_freq_khz;
+	unsigned int	mult_fact;
+};
+
+static DEFINE_PER_CPU(struct walt_cpu_cycle, walt_cc);
+
+static u64 walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
+{
+	unsigned long flags;
+	u64 delta;
+	u64 ret;
+
+	/*
+	 * freq is in KHz. so multiply by 1000.
+	 * time in nsec. so divide by NSEC_PER_SEC.
+	 *
+	 * cycles = (freq * 1000) * (t/10^9)
+	 *        = (freq * t)/10^6
+	 *
+	 */
+	read_lock_irqsave(&wcc->lock, flags);
+	delta = wc - wcc->last_time_ns;
+	ret = wcc->cycles + ((delta * wcc->mult_fact) >> 20);
+	read_unlock_irqrestore(&wcc->lock, flags);
+
+	return ret;
+}
+
+static void update_walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
+{
+	unsigned long flags;
+	u64 delta;
+
+	/*
+	 * freq is in KHz. so multiply by 1000.
+	 * time in nsec. so divide by NSEC_PER_SEC.
+	 *
+	 * cycles = (freq * 1000) * (t/10^9)
+	 *        = (freq * t)/10^6
+	 *
+	 */
+	write_lock_irqsave(&wcc->lock, flags);
+	delta = wc - wcc->last_time_ns;
+	wcc->cycles += (delta * wcc->mult_fact) >> 20;
+	wcc->last_time_ns = wc;
+	write_unlock_irqrestore(&wcc->lock, flags);
+}
+
+u64 walt_cpu_cycle_counter(int cpu, u64 wc)
+{
+	struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, cpu);
+	u64 cycles;
+
+	cycles = walt_compute_cpu_cycles(wcc, wc);
+
+	return cycles;
+}
+
+static void walt_cpufreq_transition(void *unused, struct cpufreq_policy *policy)
+{
+	int i;
+	struct walt_cpu_cycle *wcc;
+	u64 wc;
+	unsigned int mult_fact;
+
+	wc = sched_clock();
+	for_each_cpu(i, policy->related_cpus) {
+		wcc = &per_cpu(walt_cc, i);
+		update_walt_compute_cpu_cycles(wcc, wc);
+		wcc->cur_freq_khz = policy->cur;
+	}
+
+	mult_fact = (policy->cur << SCHED_CAPACITY_SHIFT)/1000;
+	mult_fact = (mult_fact << SCHED_CAPACITY_SHIFT)/1000;
+	for_each_cpu(i, policy->related_cpus) {
+		wcc = &per_cpu(walt_cc, i);
+		wcc->mult_fact = mult_fact;
+	}
+
+}
+
+void walt_cycle_counter_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, i);
+
+		rwlock_init(&wcc->lock);
+		wcc->cur_freq_khz = cpufreq_quick_get(i);
+		wcc->last_time_ns = 0;
+		wcc->cycles = 0;
+		wcc->mult_fact = (wcc->cur_freq_khz << SCHED_CAPACITY_SHIFT)/1000;
+		wcc->mult_fact = (wcc->mult_fact << SCHED_CAPACITY_SHIFT)/1000;
+	}
+
+	walt_get_cycle_counts_cb = walt_cpu_cycle_counter;
+	use_cycle_counter = true;
+	complete(&walt_get_cycle_counts_cb_completion);
+
+	register_trace_android_rvh_cpufreq_transition(walt_cpufreq_transition, NULL);
+}
--- a/kernel/sched/walt/walt_debug.c
+++ b/kernel/sched/walt/walt_debug.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <trace/hooks/sched.h>
+
+#include "walt.h"
+#include "walt_debug.h"
+
+static void android_rvh_schedule_bug(void *unused, void *unused2)
+{
+	BUG();
+}
+
+static int __init walt_debug_init(void)
+{
+	int ret;
+
+	ret = preemptirq_long_init();
+	if (ret)
+		return ret;
+
+	register_trace_android_rvh_schedule_bug(android_rvh_schedule_bug, NULL);
+
+	return 0;
+}
+module_init(walt_debug_init);
+
+MODULE_DESCRIPTION("QTI WALT Debug Module");
+MODULE_LICENSE("GPL v2");
--- a/kernel/sched/walt/walt_debug.h
+++ b/kernel/sched/walt/walt_debug.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+int preemptirq_long_init(void);
--- a/kernel/sched/walt/walt_gclk_cycle_counter_driver.c
+++ b/kernel/sched/walt/walt_gclk_cycle_counter_driver.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include "walt.h"
+
+struct gclk_counter {
+	u64 total_cycle_counter;
+	u64 prev_cycle_counter;
+	spinlock_t lock;
+};
+
+static struct gclk_counter walt_gclk_counter[MAX_CLUSTERS];
+
+struct walt_ncc_data {
+	void __iomem *base;
+};
+
+static struct walt_ncc_data ncc_data[MAX_CLUSTERS];
+
+u64 walt_get_ncc_gclk_cycle_counter(int cpu, u64 wc)
+{
+	struct gclk_counter *ncc_counter;
+	struct walt_ncc_data *data;
+	u64 cycle_counter_ret;
+	unsigned long flags;
+	int index;
+	u64 val;
+
+	index = topology_cluster_id(cpu);
+
+	data = &ncc_data[index];
+
+	ncc_counter = &walt_gclk_counter[index];
+	spin_lock_irqsave(&ncc_counter->lock, flags);
+
+	val = readq_relaxed(data->base);
+
+	if (val < ncc_counter->prev_cycle_counter) {
+		/* Handle counter overflow.
+		 * Most likely will not occur
+		 * for 64 bit counter, but
+		 * handling for completeness.
+		 */
+		ncc_counter->total_cycle_counter += U64_MAX -
+			ncc_counter->prev_cycle_counter + val;
+		ncc_counter->prev_cycle_counter = val;
+	} else {
+		ncc_counter->total_cycle_counter += val -
+			ncc_counter->prev_cycle_counter;
+		ncc_counter->prev_cycle_counter = val;
+	}
+	cycle_counter_ret = ncc_counter->total_cycle_counter;
+	spin_unlock_irqrestore(&ncc_counter->lock, flags);
+
+	pr_debug("CPU %u, cluster-id %d\n", cpu, index);
+
+	return cycle_counter_ret;
+}
+
+static int walt_gclk_cycle_counter_driver_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	void __iomem *base;
+	int ret = -ENODEV, index;
+	struct walt_sched_cluster *cluster;
+
+	for_each_sched_cluster(cluster) {
+		index = topology_cluster_id(cpumask_first(&cluster->cpus));
+		res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+		if (!res) {
+			dev_err(dev, "failed to get mem resource %d\n", index);
+			return -ENODEV;
+		}
+
+		if (!devm_request_mem_region(dev, res->start, resource_size(res), res->name)) {
+			dev_err(dev, "failed to request resource %pR\n", res);
+			return -EBUSY;
+		}
+
+		base = devm_ioremap(dev, res->start, resource_size(res));
+		if (!base) {
+			dev_err(dev, "failed to map resource %pR\n", res);
+			return -ENOMEM;
+		}
+		ncc_data[index].base = base;
+	}
+
+	if (!walt_get_cycle_counts_cb) {
+		for (int i = 0; i < MAX_CLUSTERS; i++)
+			spin_lock_init(&walt_gclk_counter[i].lock);
+		walt_get_cycle_counts_cb = walt_get_ncc_gclk_cycle_counter;
+		use_cycle_counter = true;
+		complete(&walt_get_cycle_counts_cb_completion);
+		return 0;
+	}
+
+	return ret;
+}
+
+static int walt_gclk_cycle_counter_driver_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct of_device_id walt_gclk_cycle_counter_match[] = {
+		{ .compatible = "qcom,gclk" },
+		{}
+};
+
+static struct platform_driver walt_gclk_cycle_counter_driver = {
+	.driver = {
+		.name = "walt-gclk-cycle-counter",
+		.of_match_table = walt_gclk_cycle_counter_match
+	},
+	.probe = walt_gclk_cycle_counter_driver_probe,
+	.remove = walt_gclk_cycle_counter_driver_remove,
+};
+
+int walt_gclk_cycle_counter_driver_register(void)
+{
+	return platform_driver_register(&walt_gclk_cycle_counter_driver);
+}
--- a/kernel/sched/walt/walt_halt.c
+++ b/kernel/sched/walt/walt_halt.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/sched/isolation.h>
+#include <trace/hooks/sched.h>
+#include <walt.h>
+#include "trace.h"
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+enum pause_type {
+	HALT,
+	PARTIAL_HALT,
+
+	MAX_PAUSE_TYPE
+};
+
+/* if a cpu is halting */
+struct cpumask __cpu_halt_mask;
+struct cpumask __cpu_partial_halt_mask;
+
+/* spin lock to allow calling from non-preemptible context */
+static DEFINE_RAW_SPINLOCK(halt_lock);
+
+struct halt_cpu_state {
+	u8		client_vote_mask[MAX_PAUSE_TYPE];
+};
+
+static DEFINE_PER_CPU(struct halt_cpu_state, halt_state);
+static DEFINE_RAW_SPINLOCK(walt_drain_pending_lock);
+
+/* the amount of time allowed for enqueue operations that happen
+ * just after a halt operation.
+ */
+#define WALT_HALT_CHECK_THRESHOLD_NS 400000
+
+/*
+ * Remove a task from the runqueue and pretend that it's migrating. This
+ * should prevent migrations for the detached task and disallow further
+ * changes to tsk_cpus_allowed.
+ */
+void
+detach_one_task_core(struct task_struct *p, struct rq *rq,
+		     struct list_head *tasks)
+{
+	walt_lockdep_assert_rq(rq, p);
+
+	p->on_rq = TASK_ON_RQ_MIGRATING;
+	deactivate_task(rq, p, 0);
+	list_add(&p->se.group_node, tasks);
+}
+
+void attach_tasks_core(struct list_head *tasks, struct rq *rq)
+{
+	struct task_struct *p;
+
+	walt_lockdep_assert_rq(rq, NULL);
+
+	while (!list_empty(tasks)) {
+		p = list_first_entry(tasks, struct task_struct, se.group_node);
+		list_del_init(&p->se.group_node);
+
+		BUG_ON(task_rq(p) != rq);
+		activate_task(rq, p, 0);
+		p->on_rq = TASK_ON_RQ_QUEUED;
+	}
+}
+
+/*
+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
+ * try_to_wake_up()->select_task_rq().
+ *
+ * Called with rq->__lock held even though we'er in stop_machine() and
+ * there's no concurrency possible, we hold the required locks anyway
+ * because of lock validation efforts.
+ *
+ * The function will skip CPU pinned kthreads.
+ */
+static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
+{
+	struct rq *rq = dead_rq;
+	struct task_struct *next, *stop = rq->stop;
+	LIST_HEAD(percpu_kthreads);
+	unsigned int num_pinned_kthreads = 1;
+	struct rq_flags orf = *rf;
+	int dest_cpu;
+
+	/*
+	 * Fudge the rq selection such that the below task selection loop
+	 * doesn't get stuck on the currently eligible stop task.
+	 *
+	 * We're currently inside stop_machine() and the rq is either stuck
+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
+	 * either way we should never end up calling schedule() until we're
+	 * done here.
+	 */
+	rq->stop = NULL;
+
+	/*
+	 * put_prev_task() and pick_next_task() sched
+	 * class method both need to have an up-to-date
+	 * value of rq->clock[_task]
+	 */
+	update_rq_clock(rq);
+
+#ifdef CONFIG_SCHED_DEBUG
+	/* note the clock update in orf */
+	orf.clock_update_flags |= RQCF_UPDATED;
+#endif
+
+	for (;;) {
+		/*
+		 * There's this thread running, bail when that's the only
+		 * remaining thread:
+		 */
+		if (rq->nr_running == 1)
+			break;
+
+		next = pick_migrate_task(rq);
+
+		/*
+		 * Argh ... no iterator for tasks, we need to remove the
+		 * kthread from the run-queue to continue.
+		 */
+
+		if (is_per_cpu_kthread(next)) {
+			detach_one_task_core(next, rq, &percpu_kthreads);
+			num_pinned_kthreads += 1;
+			continue;
+		}
+
+		/*
+		 * Rules for changing task_struct::cpus_mask are holding
+		 * both pi_lock and rq->__lock, such that holding either
+		 * stabilizes the mask.
+		 *
+		 * Drop rq->__lock is not quite as disastrous as it usually is
+		 * because !cpu_active at this point, which means load-balance
+		 * will not interfere. Also, stop-machine.
+		 */
+		rq_unlock(rq, rf);
+		raw_spin_lock(&next->pi_lock);
+		raw_spin_rq_lock(rq);
+		rq_repin_lock(rq, rf);
+
+		/*
+		 * Since we're inside stop-machine, _nothing_ should have
+		 * changed the task, WARN if weird stuff happened, because in
+		 * that case the above rq->__lock drop is a fail too.
+		 */
+		if (task_rq(next) != rq || !task_on_rq_queued(next)) {
+			raw_spin_unlock(&next->pi_lock);
+			continue;
+		}
+
+		/* Find suitable destination for @next */
+		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
+
+		if (cpu_of(rq) != dest_cpu && !is_migration_disabled(next)) {
+			/* only perform a required migration */
+			rq = __migrate_task(rq, rf, next, dest_cpu);
+
+			if (rq != dead_rq) {
+				rq_unlock(rq, rf);
+				rq = dead_rq;
+				*rf = orf;
+				raw_spin_rq_lock(rq);
+				rq_repin_lock(rq, rf);
+			}
+		} else {
+			detach_one_task_core(next, rq, &percpu_kthreads);
+			num_pinned_kthreads += 1;
+		}
+
+		raw_spin_unlock(&next->pi_lock);
+	}
+
+	if (num_pinned_kthreads > 1)
+		attach_tasks_core(&percpu_kthreads, rq);
+
+	rq->stop = stop;
+}
+
+void __balance_callbacks(struct rq *rq);
+
+static int drain_rq_cpu_stop(void *data)
+{
+	struct rq *rq = this_rq();
+	struct rq_flags rf;
+	struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
+
+	rq_lock_irqsave(rq, &rf);
+	/* rq lock is pinned */
+
+	/* migrate tasks assumes that the lock is pinned, and will unlock/repin */
+	migrate_tasks(rq, &rf);
+
+	/* __balance_callbacks can unlock and relock the rq lock. unpin */
+	rq_unpin_lock(rq, &rf);
+
+	/*
+	 * service any callbacks that were accumulated, prior to unlocking. such that
+	 * any subsequent calls to rq_lock... will see an rq->balance_callback set to
+	 * the default (0 or balance_push_callback);
+	 */
+	wrq->enqueue_counter = 0;
+	__balance_callbacks(rq);
+	if (wrq->enqueue_counter)
+		WALT_BUG(WALT_BUG_WALT, NULL, "cpu: %d task was re-enqueued", cpu_of(rq));
+
+	/* lock is no longer pinned, raw unlock using same flags as locking */
+	raw_spin_rq_unlock_irqrestore(rq, rf.flags);
+
+	return 0;
+}
+
+static int cpu_drain_rq(unsigned int cpu)
+{
+	if (!cpu_online(cpu))
+		return 0;
+
+	if (available_idle_cpu(cpu))
+		return 0;
+
+	/* this will schedule, must not be in atomic context */
+	return stop_one_cpu(cpu, drain_rq_cpu_stop, NULL);
+}
+
+struct drain_thread_data {
+	cpumask_t cpus_to_drain;
+};
+
+static struct drain_thread_data drain_data = {
+	.cpus_to_drain = { CPU_BITS_NONE }
+};
+
+static int __ref try_drain_rqs(void *data)
+{
+	cpumask_t *cpus_ptr = &((struct drain_thread_data *)data)->cpus_to_drain;
+	int cpu;
+	unsigned long flags;
+
+	while (!kthread_should_stop()) {
+		raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
+		if (cpumask_weight(cpus_ptr)) {
+			cpumask_t local_cpus;
+
+			cpumask_copy(&local_cpus, cpus_ptr);
+			raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
+
+			for_each_cpu(cpu, &local_cpus)
+				cpu_drain_rq(cpu);
+
+			raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
+			cpumask_andnot(cpus_ptr, cpus_ptr, &local_cpus);
+
+		}
+		raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		set_current_state(TASK_RUNNING);
+	}
+
+	return 0;
+}
+
+void restrict_cpus_and_freq(struct cpumask *cpus)
+{
+	struct cpumask restrict_cpus;
+	int cpu = 0;
+
+	cpumask_copy(&restrict_cpus, cpus);
+
+	if (cpumask_intersects(cpus, cpu_partial_halt_mask) &&
+			!cpumask_intersects(cpus, cpu_halt_mask) &&
+			is_state1()) {
+		for_each_cpu(cpu, cpus)
+			freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
+				sysctl_max_freq_partial_halt;
+	} else {
+		for_each_cpu(cpu, cpus) {
+			cpumask_or(&restrict_cpus, &restrict_cpus, &(cpu_cluster(cpu)->cpus));
+			freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
+				FREQ_QOS_MAX_DEFAULT_VALUE;
+		}
+	}
+
+	update_smart_freq_capacities();
+}
+
+struct task_struct *walt_drain_thread;
+
+static int halt_cpus(struct cpumask *cpus, enum pause_type type)
+{
+	int cpu;
+	int ret = 0;
+	u64 start_time = 0;
+	struct halt_cpu_state *halt_cpu_state;
+	unsigned long flags;
+
+	if (trace_halt_cpus_enabled())
+		start_time = sched_clock();
+
+	trace_halt_cpus_start(cpus, 1);
+
+	/* add the cpus to the halt mask */
+	for_each_cpu(cpu, cpus) {
+		if (cpu == cpumask_first(system_32bit_el0_cpumask())) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
+
+		if (type == HALT)
+			cpumask_set_cpu(cpu, cpu_halt_mask);
+		else
+			cpumask_set_cpu(cpu, cpu_partial_halt_mask);
+
+		/* guarantee mask written at this time */
+		wmb();
+	}
+
+	restrict_cpus_and_freq(cpus);
+
+	/* migrate tasks off the cpu */
+	if (type == HALT) {
+		/* signal and wakeup the drain kthread */
+		raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
+		cpumask_or(&drain_data.cpus_to_drain, &drain_data.cpus_to_drain, cpus);
+		raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
+
+		wake_up_process(walt_drain_thread);
+	}
+out:
+	trace_halt_cpus(cpus, start_time, 1, ret);
+
+	return ret;
+}
+
+/* start the cpus again, and kick them to balance */
+static int start_cpus(struct cpumask *cpus, enum pause_type type)
+{
+	u64 start_time = sched_clock();
+	struct halt_cpu_state *halt_cpu_state;
+	int cpu;
+
+	trace_halt_cpus_start(cpus, 0);
+
+	for_each_cpu(cpu, cpus) {
+		halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
+
+		/* guarantee the halt state is updated */
+		wmb();
+
+		if (type == HALT)
+			cpumask_clear_cpu(cpu, cpu_halt_mask);
+		else
+			cpumask_clear_cpu(cpu, cpu_partial_halt_mask);
+
+		/* kick the cpu so it can pull tasks
+		 * after the mask has been cleared.
+		 */
+		walt_smp_call_newidle_balance(cpu);
+	}
+
+	restrict_cpus_and_freq(cpus);
+
+	trace_halt_cpus(cpus, start_time, 0, 0);
+
+	return 0;
+}
+
+/* update client for cpus in yield/halt mask */
+static void update_clients(struct cpumask *cpus, bool halt, enum pause_client client,
+			   enum pause_type type)
+{
+	int cpu;
+	struct halt_cpu_state *halt_cpu_state;
+
+	for_each_cpu(cpu, cpus) {
+		halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
+		if (halt)
+			halt_cpu_state->client_vote_mask[type] |=  client;
+		else
+			halt_cpu_state->client_vote_mask[type] &= ~client;
+	}
+}
+
+/* remove cpus that are already halted */
+static void update_halt_cpus(struct cpumask *cpus, enum pause_type type)
+{
+	int cpu;
+	struct halt_cpu_state *halt_cpu_state;
+
+	for_each_cpu(cpu, cpus) {
+		halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
+		if (halt_cpu_state->client_vote_mask[type])
+			cpumask_clear_cpu(cpu, cpus);
+	}
+}
+
+/* cpus will be modified */
+static int walt_halt_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
+{
+	int ret = 0;
+	cpumask_t requested_cpus;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&halt_lock, flags);
+
+	cpumask_copy(&requested_cpus, cpus);
+
+	/* remove cpus that are already halted */
+	update_halt_cpus(cpus, type);
+
+	if (cpumask_empty(cpus)) {
+		update_clients(&requested_cpus, true, client, type);
+		goto unlock;
+	}
+
+	ret = halt_cpus(cpus, type);
+
+	if (ret < 0)
+		pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
+			 cpumask_pr_args(&requested_cpus));
+	else
+		update_clients(&requested_cpus, true, client, type);
+unlock:
+	raw_spin_unlock_irqrestore(&halt_lock, flags);
+
+	return ret;
+}
+
+int walt_pause_cpus(struct cpumask *cpus, enum pause_client client)
+{
+	if (walt_disabled)
+		return -EAGAIN;
+	return walt_halt_cpus(cpus, client, HALT);
+}
+EXPORT_SYMBOL_GPL(walt_pause_cpus);
+
+int walt_partial_pause_cpus(struct cpumask *cpus, enum pause_client client)
+{
+	if (walt_disabled)
+		return -EAGAIN;
+	return walt_halt_cpus(cpus, client, PARTIAL_HALT);
+}
+EXPORT_SYMBOL_GPL(walt_partial_pause_cpus);
+
+/* cpus will be modified */
+static int walt_start_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
+{
+	int ret = 0;
+	cpumask_t requested_cpus;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&halt_lock, flags);
+	cpumask_copy(&requested_cpus, cpus);
+	update_clients(&requested_cpus, false, client, type);
+
+	/* remove cpus that should still be halted */
+	update_halt_cpus(cpus, type);
+
+	ret = start_cpus(cpus, type);
+
+	if (ret < 0) {
+		pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
+			 cpumask_pr_args(&requested_cpus));
+		/* restore/increment ref counts in case of error */
+		update_clients(&requested_cpus, true, client, type);
+	}
+
+	raw_spin_unlock_irqrestore(&halt_lock, flags);
+
+	return ret;
+}
+
+int walt_resume_cpus(struct cpumask *cpus, enum pause_client client)
+{
+	if (walt_disabled)
+		return -EAGAIN;
+	return walt_start_cpus(cpus, client, HALT);
+}
+EXPORT_SYMBOL_GPL(walt_resume_cpus);
+
+int walt_partial_resume_cpus(struct cpumask *cpus, enum pause_client client)
+{
+	if (walt_disabled)
+		return -EAGAIN;
+	return walt_start_cpus(cpus, client, PARTIAL_HALT);
+}
+EXPORT_SYMBOL_GPL(walt_partial_resume_cpus);
+
+/* return true if the requested client has fully halted one of the cpus */
+bool cpus_halted_by_client(struct cpumask *cpus, enum pause_client client)
+{
+	struct halt_cpu_state *halt_cpu_state;
+	int cpu;
+
+	for_each_cpu(cpu, cpus) {
+		halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
+		if ((bool)(halt_cpu_state->client_vote_mask[HALT] & client))
+			return true;
+	}
+
+	return false;
+}
+
+static void android_rvh_get_nohz_timer_target(void *unused, int *cpu, bool *done)
+{
+	int i, default_cpu = -1;
+	struct sched_domain *sd;
+	cpumask_t active_unhalted;
+
+	*done = true;
+	cpumask_andnot(&active_unhalted, cpu_active_mask, cpu_halt_mask);
+
+	if (housekeeping_cpu(*cpu, HK_TYPE_TIMER) && !cpu_halted(*cpu)) {
+		if (!available_idle_cpu(*cpu))
+			return;
+		default_cpu = *cpu;
+	}
+
+	/*
+	 * find first cpu halted by core control and try to avoid
+	 * affecting externally halted cpus.
+	 */
+	if (!cpumask_weight(&active_unhalted)) {
+		cpumask_t tmp_pause, tmp_part_pause, tmp_halt, *tmp;
+
+		cpumask_and(&tmp_part_pause, cpu_active_mask, &cpus_part_paused_by_us);
+		cpumask_and(&tmp_pause, cpu_active_mask, &cpus_paused_by_us);
+		cpumask_and(&tmp_halt, cpu_active_mask, cpu_halt_mask);
+		tmp = cpumask_weight(&tmp_part_pause) ? &tmp_part_pause :
+			cpumask_weight(&tmp_pause) ? &tmp_pause : &tmp_halt;
+
+		for_each_cpu(i, tmp) {
+			if ((*cpu == i) && cpumask_weight(tmp) > 1)
+				continue;
+
+			*cpu = i;
+			return;
+		}
+	}
+
+	rcu_read_lock();
+	for_each_domain(*cpu, sd) {
+		for_each_cpu_and(i, sched_domain_span(sd),
+			housekeeping_cpumask(HK_TYPE_TIMER)) {
+			if (*cpu == i)
+				continue;
+
+			if (!available_idle_cpu(i) && !cpu_halted(i)) {
+				*cpu = i;
+				goto unlock;
+			}
+		}
+	}
+
+	if (default_cpu == -1) {
+		for_each_cpu_and(i, &active_unhalted,
+				 housekeeping_cpumask(HK_TYPE_TIMER)) {
+			if (*cpu == i)
+				continue;
+
+			if (!available_idle_cpu(i)) {
+				*cpu = i;
+				goto unlock;
+			}
+		}
+
+		/* choose any active unhalted cpu */
+		default_cpu = cpumask_any(&active_unhalted);
+		if (unlikely(default_cpu >= nr_cpu_ids))
+			goto unlock;
+	}
+
+	*cpu = default_cpu;
+unlock:
+	rcu_read_unlock();
+}
+
+/**
+ * android_rvh_set_cpus_allowed_by_task: disallow cpus that are halted
+ *
+ * NOTES: may be called if migration is disabled for the task
+ *        if per-cpu-kthread, must not deliberately return an invalid cpu
+ *        if !per-cpu-kthread, may return an invalid cpu (reject dest_cpu)
+ *        must not change cpu in in_exec 32bit task case
+ */
+static void android_rvh_set_cpus_allowed_by_task(void *unused,
+						 const struct cpumask *cpu_valid_mask,
+						 const struct cpumask *new_mask,
+						 struct task_struct *p,
+						 unsigned int *dest_cpu)
+{
+	if (unlikely(walt_disabled))
+		return;
+
+	/* allow kthreads to change affinity regardless of halt status of dest_cpu */
+	if (p->flags & PF_KTHREAD)
+		return;
+
+	if (cpu_halted(*dest_cpu) && !p->migration_disabled) {
+		cpumask_t allowed_cpus;
+
+		if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve))
+			return;
+
+		/* remove halted cpus from the valid mask, and store locally */
+		cpumask_andnot(&allowed_cpus, cpu_valid_mask, cpu_halt_mask);
+		cpumask_and(&allowed_cpus, &allowed_cpus, new_mask);
+
+		/* do not modify dest_cpu if there are no cpus to choose from */
+		if (!cpumask_empty(&allowed_cpus))
+			*dest_cpu = cpumask_any_and_distribute(&allowed_cpus, new_mask);
+	}
+}
+
+/**
+ * android_rvh_rto_next-cpu: disallow halted cpus for irq workfunctions
+ */
+static void android_rvh_rto_next_cpu(void *unused, int rto_cpu, struct cpumask *rto_mask, int *cpu)
+{
+	cpumask_t allowed_cpus;
+
+	if (unlikely(walt_disabled))
+		return;
+
+	if (cpu_halted(*cpu)) {
+		/* remove halted cpus from the valid mask, and store locally */
+		cpumask_andnot(&allowed_cpus, rto_mask, cpu_halt_mask);
+		*cpu = cpumask_next(rto_cpu, &allowed_cpus);
+	}
+}
+
+/**
+ * android_rvh_is_cpu_allowed: disallow cpus that are halted
+ *
+ * NOTE: this function will not be called if migration is disabled for the task.
+ */
+static void android_rvh_is_cpu_allowed(void *unused, struct task_struct *p, int cpu, bool *allowed)
+{
+	if (unlikely(walt_disabled))
+		return;
+
+	if (cpumask_test_cpu(cpu, cpu_halt_mask)) {
+		cpumask_t cpus_allowed;
+
+		/* default reject for any halted cpu */
+		*allowed = false;
+
+		if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve)) {
+			/* 32bit task in execve. allow this cpu. */
+			*allowed = true;
+			return;
+		}
+
+		/*
+		 * for cfs threads, active cpus in the affinity are allowed
+		 * but halted cpus are not allowed
+		 */
+		cpumask_and(&cpus_allowed, cpu_active_mask, p->cpus_ptr);
+		cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_halt_mask);
+
+		if (!(p->flags & PF_KTHREAD)) {
+			if (cpumask_empty(&cpus_allowed)) {
+				/*
+				 * All affined cpus are inactive or halted.
+				 * Allow this cpu for user threads
+				 */
+				*allowed = true;
+			}
+			return;
+		}
+
+		/* for kthreads, dying cpus are not allowed */
+		cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_dying_mask);
+		if (cpumask_empty(&cpus_allowed)) {
+			/*
+			 * All affined cpus inactive or halted or dying.
+			 * Allow this cpu for kthreads
+			 */
+			*allowed = true;
+		}
+	}
+}
+
+void walt_halt_init(void)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+	walt_drain_thread = kthread_run(try_drain_rqs, &drain_data, "halt_drain_rqs");
+	if (IS_ERR(walt_drain_thread)) {
+		pr_err("Error creating walt drain thread\n");
+		return;
+	}
+
+	sched_setscheduler_nocheck(walt_drain_thread, SCHED_FIFO, &param);
+
+	register_trace_android_rvh_get_nohz_timer_target(android_rvh_get_nohz_timer_target, NULL);
+	register_trace_android_rvh_set_cpus_allowed_by_task(
+						android_rvh_set_cpus_allowed_by_task, NULL);
+	register_trace_android_rvh_rto_next_cpu(android_rvh_rto_next_cpu, NULL);
+	register_trace_android_rvh_is_cpu_allowed(android_rvh_is_cpu_allowed, NULL);
+
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
--- a/kernel/sched/walt/walt_lb.c
+++ b/kernel/sched/walt/walt_lb.c
--- a/kernel/sched/walt/walt_rt.c
+++ b/kernel/sched/walt/walt_rt.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <trace/hooks/sched.h>
+
+#include "walt.h"
+#include "trace.h"
+
+static DEFINE_PER_CPU(cpumask_var_t, walt_local_cpu_mask);
+DEFINE_PER_CPU(u64, rt_task_arrival_time) = 0;
+static bool long_running_rt_task_trace_rgstrd;
+
+static void rt_task_arrival_marker(void *unused, bool preempt,
+	struct task_struct *prev, struct task_struct *next,
+	unsigned int prev_state)
+{
+	unsigned int cpu = raw_smp_processor_id();
+
+	if (next->policy == SCHED_FIFO && next != cpu_rq(cpu)->stop)
+		per_cpu(rt_task_arrival_time, cpu) = rq_clock_task(this_rq());
+	else
+		per_cpu(rt_task_arrival_time, cpu) = 0;
+}
+
+static void long_running_rt_task_notifier(void *unused, struct rq *rq)
+{
+	struct task_struct *curr = rq->curr;
+	unsigned int cpu = raw_smp_processor_id();
+
+	if (!sysctl_sched_long_running_rt_task_ms)
+		return;
+
+	if (!per_cpu(rt_task_arrival_time, cpu))
+		return;
+
+	if (per_cpu(rt_task_arrival_time, cpu) && curr->policy != SCHED_FIFO) {
+		/*
+		 * It is possible that the scheduling policy for the current
+		 * task might get changed after task arrival time stamp is
+		 * noted during sched_switch of RT task. To avoid such false
+		 * positives, reset arrival time stamp.
+		 */
+		per_cpu(rt_task_arrival_time, cpu) = 0;
+		return;
+	}
+
+	/*
+	 * Since we are called from the main tick, rq clock task must have
+	 * been updated very recently. Use it directly, instead of
+	 * update_rq_clock_task() to avoid warnings.
+	 */
+	if (rq->clock_task -
+		per_cpu(rt_task_arrival_time, cpu)
+			> sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC) {
+		printk_deferred("RT task %s (%d) runtime > %u now=%llu task arrival time=%llu runtime=%llu\n",
+				curr->comm, curr->pid,
+				sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC,
+				rq->clock_task,
+				per_cpu(rt_task_arrival_time, cpu),
+				rq->clock_task -
+				per_cpu(rt_task_arrival_time, cpu));
+		BUG();
+	}
+}
+
+int sched_long_running_rt_task_ms_handler(struct ctl_table *table, int write,
+				       void __user *buffer, size_t *lenp,
+				       loff_t *ppos)
+{
+	int ret;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);
+
+	ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (sysctl_sched_long_running_rt_task_ms > 0 &&
+			sysctl_sched_long_running_rt_task_ms < 800)
+		sysctl_sched_long_running_rt_task_ms = 800;
+
+	if (write && !long_running_rt_task_trace_rgstrd) {
+		register_trace_sched_switch(rt_task_arrival_marker, NULL);
+		register_trace_android_vh_scheduler_tick(long_running_rt_task_notifier, NULL);
+		long_running_rt_task_trace_rgstrd = true;
+	}
+
+	mutex_unlock(&mutex);
+
+	return ret;
+}
+
+static void walt_rt_energy_aware_wake_cpu(struct task_struct *task, struct cpumask *lowest_mask,
+					  int ret, int *best_cpu)
+{
+	int cpu;
+	unsigned long util, best_cpu_util = ULONG_MAX;
+	unsigned long best_cpu_util_cum = ULONG_MAX;
+	unsigned long util_cum;
+	unsigned long tutil = task_util(task);
+	unsigned int best_idle_exit_latency = UINT_MAX;
+	unsigned int cpu_idle_exit_latency = UINT_MAX;
+	bool boost_on_big = rt_boost_on_big();
+	int cluster;
+	int order_index = (boost_on_big && num_sched_clusters > 1) ? 1 : 0;
+	int end_index = 0;
+	bool best_cpu_lt = true;
+
+	if (unlikely(walt_disabled))
+		return;
+
+	if (!ret)
+		return; /* No targets found */
+
+	rcu_read_lock();
+
+	if (soc_feat(SOC_ENABLE_SILVER_RT_SPREAD_BIT) && order_index == 0)
+		end_index = 1;
+
+	for (cluster = 0; cluster < num_sched_clusters; cluster++) {
+		for_each_cpu_and(cpu, lowest_mask, &cpu_array[order_index][cluster]) {
+			bool lt;
+
+			trace_sched_cpu_util(cpu, lowest_mask);
+
+			if (!cpu_active(cpu))
+				continue;
+
+			if (cpu_halted(cpu))
+				continue;
+
+			if (sched_cpu_high_irqload(cpu))
+				continue;
+
+			if (__cpu_overutilized(cpu, tutil))
+				continue;
+
+			util = cpu_util(cpu);
+
+			lt = (walt_low_latency_task(cpu_rq(cpu)->curr) ||
+				walt_nr_rtg_high_prio(cpu));
+
+			/*
+			 * When the best is suitable and the current is not,
+			 * skip it
+			 */
+			if (lt && !best_cpu_lt)
+				continue;
+
+			/*
+			 * Either both are sutilable or unsuitable, load takes
+			 * precedence.
+			 */
+			if (!(best_cpu_lt ^ lt) && (util > best_cpu_util))
+				continue;
+
+			/*
+			 * If the previous CPU has same load, keep it as
+			 * best_cpu.
+			 */
+			if (best_cpu_util == util && *best_cpu == task_cpu(task))
+				continue;
+
+			/*
+			 * If candidate CPU is the previous CPU, select it.
+			 * Otherwise, if its load is same with best_cpu and in
+			 * a shallower C-state, select it.  If all above
+			 * conditions are same, select the least cumulative
+			 * window demand CPU.
+			 */
+			cpu_idle_exit_latency = walt_get_idle_exit_latency(cpu_rq(cpu));
+
+			util_cum = cpu_util_cum(cpu);
+			if (cpu != task_cpu(task) && best_cpu_util == util) {
+				if (best_idle_exit_latency < cpu_idle_exit_latency)
+					continue;
+
+				if (best_idle_exit_latency == cpu_idle_exit_latency &&
+						best_cpu_util_cum < util_cum)
+					continue;
+			}
+
+			best_idle_exit_latency = cpu_idle_exit_latency;
+			best_cpu_util_cum = util_cum;
+			best_cpu_util = util;
+			*best_cpu = cpu;
+			best_cpu_lt = lt;
+		}
+		if (cluster < end_index) {
+			if (*best_cpu == -1 || !available_idle_cpu(*best_cpu))
+				continue;
+		}
+
+		if (*best_cpu != -1)
+			break;
+	}
+
+	rcu_read_unlock();
+}
+
+#ifdef CONFIG_UCLAMP_TASK
+static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
+{
+	unsigned int min_cap;
+	unsigned int max_cap;
+	unsigned int cpu_cap;
+
+	min_cap = uclamp_eff_value(p, UCLAMP_MIN);
+	max_cap = uclamp_eff_value(p, UCLAMP_MAX);
+
+	cpu_cap = capacity_orig_of(cpu);
+
+	return cpu_cap >= min(min_cap, max_cap);
+}
+#else
+static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
+{
+	return true;
+}
+#endif
+
+/*
+ * walt specific should_honor_rt_sync (see rt.c).  this will honor
+ * the sync flag regardless of whether the current waker is cfs or rt
+ */
+static inline bool walt_should_honor_rt_sync(struct rq *rq, struct task_struct *p,
+					     bool sync)
+{
+	return sync &&
+		p->prio <= rq->rt.highest_prio.next &&
+		rq->rt.rt_nr_running <= 2;
+}
+
+enum rt_fastpaths {
+	NONE = 0,
+	NON_WAKEUP,
+	SYNC_WAKEUP,
+	CLUSTER_PACKING_FASTPATH,
+};
+
+static void walt_select_task_rq_rt(void *unused, struct task_struct *task, int cpu,
+					int sd_flag, int wake_flags, int *new_cpu)
+{
+	struct task_struct *curr;
+	struct rq *rq, *this_cpu_rq;
+	bool may_not_preempt;
+	bool sync = !!(wake_flags & WF_SYNC);
+	int ret, target = -1, this_cpu;
+	struct cpumask *lowest_mask = NULL;
+	int packing_cpu = -1;
+	int fastpath = NONE;
+	struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
+	struct walt_task_struct *wts;
+
+	if (unlikely(walt_disabled))
+		return;
+
+	/* For anything but wake ups, just return the task_cpu */
+	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) {
+		fastpath = NON_WAKEUP;
+		goto out;
+	}
+
+	this_cpu = raw_smp_processor_id();
+	this_cpu_rq = cpu_rq(this_cpu);
+	wts = (struct walt_task_struct *) task->android_vendor_data1;
+
+	/*
+	 * Respect the sync flag as long as the task can run on this CPU.
+	 */
+	if (sysctl_sched_sync_hint_enable && cpu_active(this_cpu) && !cpu_halted(this_cpu) &&
+	    cpumask_test_cpu(this_cpu, task->cpus_ptr) &&
+	    cpumask_test_cpu(this_cpu, &wts->reduce_mask) &&
+	    walt_should_honor_rt_sync(this_cpu_rq, task, sync)) {
+		fastpath = SYNC_WAKEUP;
+		*new_cpu = this_cpu;
+		goto out;
+	}
+
+	*new_cpu = cpu; /* previous CPU as back up */
+	rq = cpu_rq(cpu);
+
+	rcu_read_lock();
+	curr = READ_ONCE(rq->curr); /* unlocked access */
+
+	/*
+	 * If the current task on @p's runqueue is a softirq task,
+	 * it may run without preemption for a time that is
+	 * ill-suited for a waiting RT task. Therefore, try to
+	 * wake this RT task on another runqueue.
+	 *
+	 * Otherwise, just let it ride on the affined RQ and the
+	 * post-schedule router will push the preempted task away
+	 *
+	 * This test is optimistic, if we get it wrong the load-balancer
+	 * will have to sort it out.
+	 *
+	 * We take into account the capacity of the CPU to ensure it fits the
+	 * requirement of the task - which is only important on heterogeneous
+	 * systems like big.LITTLE.
+	 */
+	may_not_preempt = cpu_busy_with_softirqs(cpu);
+
+	lowest_mask = this_cpu_cpumask_var_ptr(walt_local_cpu_mask);
+
+	/*
+	 * If we're on asym system ensure we consider the different capacities
+	 * of the CPUs when searching for the lowest_mask.
+	 */
+	ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, task,
+				lowest_mask, walt_rt_task_fits_capacity);
+
+	packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
+	if (packing_cpu >= 0) {
+		while (packing_cpu < WALT_NR_CPUS) {
+			if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
+				cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
+				cpu_active(packing_cpu) &&
+				!cpu_halted(packing_cpu) &&
+				(cpu_rq(packing_cpu)->rt.rt_nr_running <= 1))
+				break;
+			packing_cpu++;
+		}
+
+		if (packing_cpu < WALT_NR_CPUS) {
+			fastpath = CLUSTER_PACKING_FASTPATH;
+			*new_cpu = packing_cpu;
+			goto unlock;
+		}
+	}
+
+	cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
+	if (!cpumask_empty(&lowest_mask_reduced))
+		walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, &target);
+	if (target == -1)
+		walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, &target);
+
+	/*
+	 * If cpu is non-preemptible, prefer remote cpu
+	 * even if it's running a higher-prio task.
+	 * Otherwise: Don't bother moving it if the destination CPU is
+	 * not running a lower priority task.
+	 */
+	if (target != -1 &&
+	    (may_not_preempt || task->prio < cpu_rq(target)->rt.highest_prio.curr))
+		*new_cpu = target;
+
+	/* if backup or chosen cpu is halted, pick something else */
+	if (cpu_halted(*new_cpu)) {
+		cpumask_t non_halted;
+
+		/* choose the lowest-order, unhalted, allowed CPU */
+		cpumask_andnot(&non_halted, task->cpus_ptr, cpu_halt_mask);
+		target = cpumask_first(&non_halted);
+		if (target < nr_cpu_ids)
+			*new_cpu = target;
+	}
+unlock:
+	rcu_read_unlock();
+out:
+	trace_sched_select_task_rt(task, fastpath, *new_cpu, lowest_mask);
+}
+
+
+static void walt_rt_find_lowest_rq(void *unused, struct task_struct *task,
+				   struct cpumask *lowest_mask, int ret, int *best_cpu)
+
+{
+	int packing_cpu = -1;
+	int fastpath = 0;
+	struct walt_task_struct *wts;
+	struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
+
+	if (unlikely(walt_disabled))
+		return;
+
+	wts = (struct walt_task_struct *) task->android_vendor_data1;
+
+	packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
+	if (packing_cpu >= 0) {
+		while (packing_cpu < WALT_NR_CPUS) {
+			if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
+				cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
+				cpu_active(packing_cpu) &&
+				!cpu_halted(packing_cpu) &&
+				(cpu_rq(packing_cpu)->rt.rt_nr_running <= 2))
+				break;
+			packing_cpu++;
+		}
+
+		if (packing_cpu < WALT_NR_CPUS) {
+			fastpath = CLUSTER_PACKING_FASTPATH;
+			*best_cpu = packing_cpu;
+			goto out;
+		}
+	}
+
+	cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
+	if (!cpumask_empty(&lowest_mask_reduced))
+		walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, best_cpu);
+	if (*best_cpu == -1)
+		walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, best_cpu);
+
+	/*
+	 * Walt was not able to find a non-halted best cpu. Ensure that
+	 * find_lowest_rq doesn't use a halted cpu going forward, but
+	 * does a best effort itself to find a good CPU.
+	 */
+	if (*best_cpu == -1)
+		cpumask_andnot(lowest_mask, lowest_mask, cpu_halt_mask);
+out:
+	trace_sched_rt_find_lowest_rq(task, fastpath, *best_cpu, lowest_mask);
+}
+
+void walt_rt_init(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i) {
+		if (!(zalloc_cpumask_var_node(&per_cpu(walt_local_cpu_mask, i),
+					      GFP_KERNEL, cpu_to_node(i)))) {
+			pr_err("walt_local_cpu_mask alloc failed for cpu%d\n", i);
+			return;
+		}
+	}
+
+	register_trace_android_rvh_select_task_rq_rt(walt_select_task_rq_rt, NULL);
+	register_trace_android_rvh_find_lowest_rq(walt_rt_find_lowest_rq, NULL);
+}
--- a/kernel/sched/walt/walt_tp.c
+++ b/kernel/sched/walt/walt_tp.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/cpu.h>
+#include <linux/tracepoint.h>
+#include <trace/hooks/sched.h>
+#include "trace.h"
+#define CREATE_TRACE_POINTS
+#include "perf_trace_counters.h"
+
+unsigned int sysctl_sched_dynamic_tp_enable;
+
+#define USE_CPUHP_STATE CPUHP_AP_ONLINE_DYN
+
+DEFINE_PER_CPU(u32, cntenset_val);
+DEFINE_PER_CPU(unsigned long, previous_ccnt);
+DEFINE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
+DEFINE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
+DEFINE_PER_CPU(u32, old_pid);
+DEFINE_PER_CPU(u32, hotplug_flag);
+DEFINE_PER_CPU(u64, prev_time);
+
+static int tracectr_cpu_hotplug_coming_up(unsigned int cpu)
+{
+	per_cpu(hotplug_flag, cpu) = 1;
+
+	return 0;
+}
+
+static void setup_prev_cnts(u32 cpu, u32 cnten_val)
+{
+	int i;
+
+	if (cnten_val & CC)
+		per_cpu(previous_ccnt, cpu) =
+			read_sysreg(pmccntr_el0);
+
+	for (i = 0; i < NUM_L1_CTRS; i++) {
+		if (cnten_val & (1 << i)) {
+			/* Select */
+			write_sysreg(i, pmselr_el0);
+			isb();
+			/* Read value */
+			per_cpu(previous_l1_cnts[i], cpu) =
+				read_sysreg(pmxevcntr_el0);
+		}
+	}
+}
+
+void tracectr_notifier(void *ignore, bool preempt,
+			struct task_struct *prev, struct task_struct *next,
+			unsigned int prev_state)
+{
+	u32 cnten_val;
+	int current_pid;
+	u32 cpu = task_cpu(next);
+	u64 now;
+
+	if (!trace_sched_switch_with_ctrs_enabled())
+		return;
+
+	current_pid = next->pid;
+	if (per_cpu(old_pid, cpu) != -1) {
+		cnten_val = read_sysreg(pmcntenset_el0);
+		per_cpu(cntenset_val, cpu) = cnten_val;
+		/* Disable all the counters that were enabled */
+		write_sysreg(cnten_val, pmcntenclr_el0);
+
+		if (per_cpu(hotplug_flag, cpu) == 1) {
+			per_cpu(hotplug_flag, cpu) = 0;
+			setup_prev_cnts(cpu, cnten_val);
+		} else {
+			trace_sched_switch_with_ctrs(preempt, prev, next);
+			now = sched_clock();
+			if ((now - per_cpu(prev_time, cpu)) > NSEC_PER_SEC) {
+				trace_sched_switch_ctrs_cfg(cpu);
+				per_cpu(prev_time, cpu) = now;
+			}
+		}
+
+		/* Enable all the counters that were disabled */
+		write_sysreg(cnten_val, pmcntenset_el0);
+	}
+	per_cpu(old_pid, cpu) = current_pid;
+}
+
+static void register_sched_switch_ctrs(void)
+{
+	int cpu, rc;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(old_pid, cpu) = -1;
+
+	rc = cpuhp_setup_state_nocalls(USE_CPUHP_STATE, "tracectr_cpu_hotplug",
+				tracectr_cpu_hotplug_coming_up,	NULL);
+	if (rc >= 0)
+		register_trace_sched_switch(tracectr_notifier, NULL);
+}
+
+static void unregister_sched_switch_ctrs(void)
+{
+	unregister_trace_sched_switch(tracectr_notifier, NULL);
+	cpuhp_remove_state_nocalls(USE_CPUHP_STATE);
+}
+
+const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
+{
+#ifdef CONFIG_SMP
+	return rd ? rd->span : NULL;
+#else
+	return NULL;
+#endif
+}
+
+static void sched_overutilized(void *data, struct root_domain *rd,
+				 bool overutilized)
+{
+	if (trace_sched_overutilized_enabled()) {
+		char span[SPAN_SIZE];
+
+		cpumap_print_to_pagebuf(false, span, sched_trace_rd_span(rd));
+		trace_sched_overutilized(overutilized, span);
+	}
+}
+
+static void walt_register_dynamic_tp_events(void)
+{
+	register_trace_sched_overutilized_tp(sched_overutilized, NULL);
+	register_sched_switch_ctrs();
+}
+
+static void walt_unregister_dynamic_tp_events(void)
+{
+	unregister_trace_sched_overutilized_tp(sched_overutilized, NULL);
+	unregister_sched_switch_ctrs();
+}
+
+int sched_dynamic_tp_handler(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	static DEFINE_MUTEX(mutex);
+	int ret = 0, *val = (unsigned int *)table->data;
+	unsigned int old_val;
+
+	mutex_lock(&mutex);
+	old_val = sysctl_sched_dynamic_tp_enable;
+
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (ret || !write || (old_val == sysctl_sched_dynamic_tp_enable))
+		goto done;
+
+	if (*val)
+		walt_register_dynamic_tp_events();
+	else
+		walt_unregister_dynamic_tp_events();
+done:
+	mutex_unlock(&mutex);
+	return ret;
+}