Add samsung specific changes

This commit is contained in:
2025-08-11 14:29:00 +02:00
parent c66122e619
commit 4d134a1294
2688 changed files with 1127995 additions and 11475 deletions

View File

@@ -103,6 +103,7 @@ obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MSM_SYSSTATS) += msm_sysstats.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/

View File

@@ -60,12 +60,6 @@
#include "audit.h"
// [ SEC_SELINUX_PORTING_COMMON
#ifdef CONFIG_PROC_AVC
#include <linux/proc_avc.h>
#endif
// ] SEC_SELINUX_PORTING_COMMON
/* No auditing will take place until audit_initialized == AUDIT_INITIALIZED.
* (Initialization happens after skb_init is called.) */
#define AUDIT_DISABLED -1
@@ -548,16 +542,8 @@ static void kauditd_printk_skb(struct sk_buff *skb)
struct nlmsghdr *nlh = nlmsg_hdr(skb);
char *data = nlmsg_data(nlh);
// [ SEC_SELINUX_PORTING_COMMON
#ifdef CONFIG_PROC_AVC
if (nlh->nlmsg_type != AUDIT_EOE && nlh->nlmsg_type != AUDIT_NETFILTER_CFG)
sec_avc_log("%s\n", data);
#else
if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit())
pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
#endif
// ] SEC_SELINUX_PORTING_COMMON
}
/**
@@ -799,15 +785,6 @@ retry:
} else
goto retry;
} else {
// [ SEC_SELINUX_PORTING_COMMON
#ifdef CONFIG_PROC_AVC
struct nlmsghdr *nlh = nlmsg_hdr(skb);
char *data = nlmsg_data(nlh);
if (nlh->nlmsg_type != AUDIT_EOE && nlh->nlmsg_type != AUDIT_NETFILTER_CFG)
sec_avc_log("%s\n", data);
#endif
// ] SEC_SELINUX_PORTING_COMMON
/* skb sent - drop the extra reference and continue */
consume_skb(skb);
failed = 0;

View File

@@ -456,40 +456,6 @@ static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
}
#ifdef CONFIG_SAMSUNG_FREECESS
/**
* Check if the task is allowed to be added to the freezer group
* only the admin can add the task to the freezer group.
*/
static int freezer_can_attach(struct cgroup_taskset *tset)
{
const struct cred *cred = current_cred(), *tcred;
struct task_struct *task;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(task, css, tset) {
tcred = __task_cred(task);
//Only system process and root have the permission.
if ((current != task) && !(cred->euid.val == 1000 || capable(CAP_SYS_ADMIN))) {
pr_err("Permission problem\n");
return -EACCES;
}
}
return 0;
}
/**
* Cancel the attach action when it failed. It's usually used to restore the attach action.
* But freezer attach just sends the signal, it will always success.
* So, it doesn't need to restore any action.
*/
static void freezer_cancel_attach(struct cgroup_taskset *tset)
{
}
#endif
static struct cftype files[] = {
{
.name = "state",
@@ -518,9 +484,5 @@ struct cgroup_subsys freezer_cgrp_subsys = {
.attach = freezer_attach,
.fork = freezer_fork,
.legacy_cftypes = files,
#ifdef CONFIG_SAMSUNG_FREECESS
.can_attach = freezer_can_attach,
.cancel_attach = freezer_cancel_attach,
#endif
};
EXPORT_SYMBOL_GPL(freezer_cgrp_subsys);

View File

@@ -120,6 +120,18 @@ config DMA_RESTRICTED_POOL
and <kernel/dma/swiotlb.c>.
If unsure, say "n".
config SWIOTLB_NONLINEAR
bool "Allow swiotlb to use non-linear memory as bounce buffers"
depends on SWIOTLB
help
This allows swiotlb driver to work with memory regions where
physical to virtual address translations can't be done using APIs
such as phys_to_virt. These could be reserved memory regions that
are not mapped by default or could be seen as "device" memory
accessed via ioremap().
If unsure, say "n".
#
# Should be selected if we can mmap non-coherent mappings to userspace.
# The only thing that is really required is a way to set an uncached bit

View File

@@ -96,6 +96,16 @@ static struct io_tlb_mem io_tlb_default_mem;
#endif /* CONFIG_SWIOTLB_DYNAMIC */
#ifdef CONFIG_SWIOTLB_NONLINEAR
phys_addr_t io_tlb_start, io_tlb_end;
static unsigned long io_tlb_nslabs;
static char *io_tlb_vstart;
static inline unsigned char *swiotlb_phys_to_virt(phys_addr_t tlb_addr);
#else
#define swiotlb_phys_to_virt phys_to_virt
#endif
static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
static unsigned long default_nareas;
@@ -266,7 +276,7 @@ void __init swiotlb_update_mem_attributes(void)
static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
unsigned long nslabs, bool late_alloc, unsigned int nareas)
{
void *vaddr = phys_to_virt(start);
void *vaddr = swiotlb_phys_to_virt(start);
unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
mem->nslabs = nslabs;
@@ -518,6 +528,63 @@ error_area:
return -ENOMEM;
}
#ifdef CONFIG_SWIOTLB_NONLINEAR
static int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
unsigned long bytes = nslabs << IO_TLB_SHIFT;
unsigned int area_order;
/* protect against double initialization */
if (WARN_ON_ONCE(mem->nslabs))
return -ENOMEM;
if (!default_nareas)
swiotlb_adjust_nareas(num_possible_cpus());
area_order = get_order(array_size(sizeof(*mem->areas),
default_nareas));
mem->areas = (struct io_tlb_area *)
__get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
if (!mem->areas)
return -ENOMEM;
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(array_size(sizeof(*mem->slots), nslabs)));
if (!mem->slots)
goto error_slots;
set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
swiotlb_init_io_tlb_pool(mem, io_tlb_start, nslabs, true, default_nareas);
add_mem_pool(&io_tlb_default_mem, mem);
swiotlb_print_info();
return 0;
error_slots:
free_pages((unsigned long)mem->areas, area_order);
return -ENOMEM;
}
int swiotlb_late_init_with_tblpaddr(char *tlb,
phys_addr_t tlb_paddr, unsigned long nslabs)
{
unsigned long bytes;
if (io_tlb_start)
return -EBUSY;
bytes = nslabs << IO_TLB_SHIFT;
io_tlb_nslabs = nslabs;
io_tlb_start = tlb_paddr;
io_tlb_vstart = tlb;
io_tlb_end = io_tlb_start + bytes;
return swiotlb_late_init_with_tbl(tlb, nslabs);
}
EXPORT_SYMBOL_GPL(swiotlb_late_init_with_tblpaddr);
#endif /* CONFIG_SWIOTLB_NONLINEAR */
void __init swiotlb_exit(void)
{
struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
@@ -829,6 +896,13 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
}
#ifdef CONFIG_SWIOTLB_NONLINEAR
static inline unsigned char *swiotlb_phys_to_virt(phys_addr_t tlb_addr)
{
return (unsigned char *)(io_tlb_vstart + (tlb_addr - io_tlb_start));
}
#endif
/*
* Bounce: copy the swiotlb buffer from or back to the original dma location
*/
@@ -1431,6 +1505,10 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
swiotlb_release_slots(dev, tlb_addr);
}
#ifdef CONFIG_SWIOTLB_NONLINEAR
EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
#endif
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir)
{
@@ -1481,6 +1559,15 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
arch_sync_dma_for_device(swiotlb_addr, size, dir);
return dma_addr;
}
#ifdef CONFIG_SWIOTLB_NONLINEAR
EXPORT_SYMBOL_GPL(swiotlb_map);
size_t swiotlb_max_mapping_size(struct device *dev)
{
return 4096;
}
#else
size_t swiotlb_max_mapping_size(struct device *dev)
{
@@ -1497,6 +1584,7 @@ size_t swiotlb_max_mapping_size(struct device *dev)
return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
}
#endif
/**
* is_swiotlb_allocated() - check if the default software IO TLB is initialized

View File

@@ -76,10 +76,6 @@
#include <trace/hooks/mm.h>
#include <trace/hooks/dtask.h>
#ifdef CONFIG_SECURITY_DEFEX
#include <linux/defex.h>
#endif
/*
* The default value should be high enough to not crash a system that randomly
* crashes its kernel from time to time, but low enough to at least not permit
@@ -822,9 +818,6 @@ void __noreturn do_exit(long code)
synchronize_group_exit(tsk, code);
#ifdef CONFIG_SECURITY_DEFEX
task_defex_zero_creds(current);
#endif
WARN_ON(tsk->plug);
profile_task_exit(tsk);

View File

@@ -100,7 +100,6 @@
#include <linux/user_events.h>
#include <linux/iommu.h>
#include <linux/cpufreq_times.h>
#include <linux/task_integrity.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -120,10 +119,6 @@
#include <linux/kdp.h>
#endif
#ifdef CONFIG_SECURITY_DEFEX
#include <linux/defex.h>
#endif
/*
* Minimum number of threads to boot the kernel
*/
@@ -2008,57 +2003,6 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
task->signal->pids[type] = pid;
}
#ifdef CONFIG_FIVE
static int dup_task_integrity(unsigned long clone_flags,
struct task_struct *tsk)
{
int ret = 0;
if (clone_flags & CLONE_VM) {
task_integrity_get(TASK_INTEGRITY(current));
task_integrity_assign(tsk, TASK_INTEGRITY(current));
} else {
task_integrity_assign(tsk, task_integrity_alloc());
if (!TASK_INTEGRITY(tsk))
ret = -ENOMEM;
}
return ret;
}
static inline void task_integrity_cleanup(struct task_struct *tsk)
{
task_integrity_put(TASK_INTEGRITY(tsk));
}
static inline int task_integrity_apply(unsigned long clone_flags,
struct task_struct *tsk)
{
int ret = 0;
if (!(clone_flags & CLONE_VM))
ret = five_fork(current, tsk);
return ret;
}
#else
static inline int dup_task_integrity(unsigned long clone_flags,
struct task_struct *tsk)
{
return 0;
}
static inline void task_integrity_cleanup(struct task_struct *tsk)
{
}
static inline int task_integrity_apply(unsigned long clone_flags,
struct task_struct *tsk)
{
return 0;
}
#endif
static inline void rcu_copy_process(struct task_struct *p)
{
#ifdef CONFIG_PREEMPT_RCU
@@ -2577,14 +2521,9 @@ __latent_entropy struct task_struct *copy_process(
goto bad_fork_cleanup_perf;
/* copy all the process information */
shm_init_task(p);
retval = dup_task_integrity(clone_flags, p);
retval = security_task_alloc(p, clone_flags);
if (retval)
goto bad_fork_cleanup_audit;
retval = security_task_alloc(p, clone_flags);
if (retval) {
task_integrity_cleanup(p);
goto bad_fork_cleanup_audit;
}
retval = copy_semundo(clone_flags, p);
if (retval)
goto bad_fork_cleanup_security;
@@ -2763,10 +2702,6 @@ __latent_entropy struct task_struct *copy_process(
goto bad_fork_cancel_cgroup;
}
retval = task_integrity_apply(clone_flags, p);
if (retval)
goto bad_fork_cancel_cgroup;
/* No more failure paths after this point. */
/*
@@ -3034,9 +2969,6 @@ pid_t kernel_clone(struct kernel_clone_args *args)
pid = get_task_pid(p, PIDTYPE_PID);
nr = pid_vnr(pid);
#ifdef CONFIG_SECURITY_DEFEX
task_defex_zero_creds(p);
#endif
if (clone_flags & CLONE_PARENT_SETTID)
put_user(nr, args->parent_tid);

695
kernel/msm_sysstats.c Normal file
View File

@@ -0,0 +1,695 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/msm_sysstats.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/pid_namespace.h>
#include <net/genetlink.h>
#include <linux/atomic.h>
#include <linux/sched/cputime.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/fdtable.h>
#include <linux/dma-buf.h>
#include <linux/dma-resv.h>
#include <linux/qcom_dma_heap.h>
struct tgid_iter {
unsigned int tgid;
struct task_struct *task;
};
static struct genl_family family;
static u64 (*sysstats_kgsl_get_stats)(pid_t pid);
static DEFINE_PER_CPU(__u32, sysstats_seqnum);
#define SYSSTATS_CMD_ATTR_MAX 3
static const struct nla_policy sysstats_cmd_get_policy[SYSSTATS_CMD_ATTR_MAX + 1] = {
[SYSSTATS_TASK_CMD_ATTR_PID] = { .type = NLA_U32 },
[SYSSTATS_TASK_CMD_ATTR_FOREACH] = { .type = NLA_U32 },
[SYSSTATS_TASK_CMD_ATTR_PIDS_OF_NAME] = { .type = NLA_NUL_STRING}};
/*
* The below dummy function is a means to get rid of calling
* callbacks with out any external sync.
*/
static u64 sysstats_kgsl_stats(pid_t pid)
{
return 0;
}
void sysstats_register_kgsl_stats_cb(u64 (*cb)(pid_t pid))
{
sysstats_kgsl_get_stats = cb;
}
EXPORT_SYMBOL_GPL(sysstats_register_kgsl_stats_cb);
void sysstats_unregister_kgsl_stats_cb(void)
{
sysstats_kgsl_get_stats = sysstats_kgsl_stats;
}
EXPORT_SYMBOL_GPL(sysstats_unregister_kgsl_stats_cb);
static int sysstats_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
const struct nla_policy *policy = NULL;
switch (ops->cmd) {
case SYSSTATS_TASK_CMD_GET:
case SYSSTATS_PIDS_CMD_GET:
policy = sysstats_cmd_get_policy;
break;
case SYSSTATS_MEMINFO_CMD_GET:
break;
default:
return -EINVAL;
}
return nlmsg_validate_deprecated(info->nlhdr, GENL_HDRLEN,
SYSSTATS_CMD_ATTR_MAX, policy,
info->extack);
}
static int send_reply(struct sk_buff *skb, struct genl_info *info)
{
struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
void *reply = genlmsg_data(genlhdr);
genlmsg_end(skb, reply);
return genlmsg_reply(skb, info);
}
static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
size_t size)
{
struct sk_buff *skb;
void *reply;
skb = genlmsg_new(size, GFP_KERNEL);
if (!skb)
return -ENOMEM;
if (!info) {
int seq = this_cpu_inc_return(sysstats_seqnum) - 1;
reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
} else
reply = genlmsg_put_reply(skb, info, &family, 0, cmd);
if (reply == NULL) {
nlmsg_free(skb);
return -EINVAL;
}
*skbp = skb;
return 0;
}
static struct task_struct *find_lock_task_mm(struct task_struct *p)
{
struct task_struct *t;
rcu_read_lock();
for_each_thread(p, t) {
task_lock(t);
if (likely(t->mm))
goto found;
task_unlock(t);
}
t = NULL;
found:
rcu_read_unlock();
return t;
}
static struct sighand_struct *sysstats_lock_task_sighand(struct task_struct *tsk,
unsigned long *flags)
{
struct sighand_struct *sighand;
rcu_read_lock();
for (;;) {
sighand = rcu_dereference(tsk->sighand);
if (unlikely(sighand == NULL))
break;
spin_lock_irqsave(&sighand->siglock, *flags);
if (likely(sighand == tsk->sighand))
break;
spin_unlock_irqrestore(&sighand->siglock, *flags);
}
rcu_read_unlock();
return sighand;
}
static bool is_system_dmabufheap(struct dma_buf *dmabuf)
{
if (!strcmp(dmabuf->exp_name, "qcom,system") ||
!strcmp(dmabuf->exp_name, "qcom,system-uncached") ||
!strcmp(dmabuf->exp_name, "system-secure") ||
!strcmp(dmabuf->exp_name, "qcom,secure-pixel") ||
!strcmp(dmabuf->exp_name, "qcom,secure-non-pixel"))
return true;
return false;
}
static int get_dma_info(const void *data, struct file *file, unsigned int n)
{
struct dma_buf *dmabuf;
unsigned long *size = (unsigned long *)data;
if (!qcom_is_dma_buf_file(file))
return 0;
dmabuf = (struct dma_buf *)file->private_data;
if (is_system_dmabufheap(dmabuf))
*size += dmabuf->size;
return 0;
}
static unsigned long get_task_unreclaimable_info(struct task_struct *task)
{
struct task_struct *thread;
struct files_struct *files;
struct files_struct *group_leader_files = NULL;
unsigned long size = 0;
int ret = 0;
for_each_thread(task, thread) {
/* task is already locked don't lock/unlock again. */
if (task != thread)
task_lock(thread);
if (unlikely(!group_leader_files))
group_leader_files = task->group_leader->files;
files = thread->files;
if (files && (group_leader_files != files ||
thread == task->group_leader))
ret = iterate_fd(files, 0, get_dma_info, &size);
if (task != thread)
task_unlock(thread);
if (ret)
break;
}
return size >> PAGE_SHIFT;
}
static unsigned long get_system_unreclaimble_info(void)
{
struct task_struct *task;
unsigned long size = 0;
rcu_read_lock();
for_each_process(task) {
task_lock(task);
size += get_task_unreclaimable_info(task);
task_unlock(task);
}
rcu_read_unlock();
/* Account the kgsl information. */
size += sysstats_kgsl_get_stats(-1) >> PAGE_SHIFT;
return size;
}
static char *nla_strdup_cust(const struct nlattr *nla, gfp_t flags)
{
size_t srclen = nla_len(nla);
char *src = nla_data(nla), *dst;
if (srclen > 0 && src[srclen - 1] == '\0')
srclen--;
dst = kmalloc(srclen + 1, flags);
if (dst != NULL) {
memcpy(dst, src, srclen);
dst[srclen] = '\0';
}
return dst;
}
static int sysstats_task_cmd_attr_pid(struct genl_info *info)
{
struct sysstats_task *stats;
struct sk_buff *rep_skb;
struct nlattr *ret;
struct task_struct *tsk;
struct task_struct *p;
size_t size;
u32 pid;
int rc;
u64 utime, stime;
const struct cred *tcred;
#ifdef CONFIG_CPUSETS
struct cgroup_subsys_state *css;
#endif
unsigned long flags;
struct signal_struct *sig;
size = nla_total_size_64bit(sizeof(struct sysstats_task));
rc = prepare_reply(info, SYSSTATS_TASK_CMD_NEW, &rep_skb, size);
if (rc < 0)
return rc;
rc = -EINVAL;
pid = nla_get_u32(info->attrs[SYSSTATS_TASK_CMD_ATTR_PID]);
ret = nla_reserve_64bit(rep_skb, SYSSTATS_TASK_TYPE_STATS,
sizeof(struct sysstats_task), SYSSTATS_TYPE_NULL);
if (!ret)
goto err;
stats = nla_data(ret);
rcu_read_lock();
tsk = find_task_by_vpid(pid);
if (tsk)
get_task_struct(tsk);
rcu_read_unlock();
if (!tsk) {
rc = -ESRCH;
goto err;
}
memset(stats, 0, sizeof(*stats));
stats->pid = task_pid_nr_ns(tsk, task_active_pid_ns(current));
p = find_lock_task_mm(tsk);
if (p) {
__acquire(p->alloc_lock);
#define K(x) ((x) << (PAGE_SHIFT - 10))
stats->anon_rss = K(get_mm_counter(p->mm, MM_ANONPAGES));
stats->file_rss = K(get_mm_counter(p->mm, MM_FILEPAGES));
stats->shmem_rss = K(get_mm_counter(p->mm, MM_SHMEMPAGES));
stats->swap_rss = K(get_mm_counter(p->mm, MM_SWAPENTS));
stats->unreclaimable = K(get_task_unreclaimable_info(p));
#undef K
task_unlock(p);
}
stats->unreclaimable += sysstats_kgsl_get_stats(stats->pid) >> 10;
task_cputime(tsk, &utime, &stime);
stats->utime = div_u64(utime, NSEC_PER_USEC);
stats->stime = div_u64(stime, NSEC_PER_USEC);
if (sysstats_lock_task_sighand(tsk, &flags)) {
sig = tsk->signal;
stats->cutime = sig->cutime;
stats->cstime = sig->cstime;
unlock_task_sighand(tsk, &flags);
}
rcu_read_lock();
tcred = __task_cred(tsk);
stats->uid = from_kuid_munged(current_user_ns(), tcred->uid);
stats->ppid = pid_alive(tsk) ?
task_tgid_nr_ns(rcu_dereference(tsk->real_parent),
task_active_pid_ns(current)) : 0;
rcu_read_unlock();
strscpy(stats->name, tsk->comm, sizeof(stats->name));
#ifdef CONFIG_CPUSETS
css = task_get_css(tsk, cpuset_cgrp_id);
cgroup_path_ns(css->cgroup, stats->state, sizeof(stats->state),
current->nsproxy->cgroup_ns);
css_put(css);
#endif
put_task_struct(tsk);
return send_reply(rep_skb, info);
err:
nlmsg_free(rep_skb);
return rc;
}
static int sysstats_task_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
if (info->attrs[SYSSTATS_TASK_CMD_ATTR_PID])
return sysstats_task_cmd_attr_pid(info);
else
return -EINVAL;
}
static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
{
struct pid *pid;
if (iter.task)
put_task_struct(iter.task);
rcu_read_lock();
retry:
iter.task = NULL;
pid = idr_get_next(&ns->idr, &iter.tgid);
if (pid) {
iter.tgid = pid_nr_ns(pid, ns);
iter.task = pid_task(pid, PIDTYPE_TGID);
if (!iter.task) {
iter.tgid += 1;
goto retry;
}
get_task_struct(iter.task);
}
rcu_read_unlock();
return iter;
}
static int sysstats_all_pids_of_name(struct sk_buff *skb, struct netlink_callback *cb)
{
struct pid_namespace *ns = task_active_pid_ns(current);
struct tgid_iter iter;
void *reply;
struct nlattr *attr;
struct nlattr *nla;
struct sysstats_pid *stats;
char *comm;
nla = nla_find(nlmsg_attrdata(cb->nlh, GENL_HDRLEN),
nlmsg_attrlen(cb->nlh, GENL_HDRLEN),
SYSSTATS_TASK_CMD_ATTR_PIDS_OF_NAME);
if (!nla)
goto out;
comm = nla_strdup_cust(nla, GFP_KERNEL);
if (!comm)
goto out;
iter.tgid = cb->args[0];
iter.task = NULL;
for (iter = next_tgid(ns, iter); iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
if (strcmp(iter.task->comm, comm))
continue;
reply = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, &family, 0, SYSSTATS_PIDS_CMD_GET);
if (reply == NULL) {
put_task_struct(iter.task);
break;
}
attr = nla_reserve(skb, SYSSTATS_PID_TYPE_STATS,
sizeof(struct sysstats_pid));
if (!attr) {
put_task_struct(iter.task);
genlmsg_cancel(skb, reply);
break;
}
stats = nla_data(attr);
memset(stats, 0, sizeof(struct sysstats_pid));
rcu_read_lock();
stats->pid = task_pid_nr_ns(iter.task,
task_active_pid_ns(current));
rcu_read_unlock();
genlmsg_end(skb, reply);
}
cb->args[0] = iter.tgid;
kfree(comm);
out:
return skb->len;
}
static int sysstats_task_foreach(struct sk_buff *skb, struct netlink_callback *cb)
{
struct pid_namespace *ns = task_active_pid_ns(current);
struct tgid_iter iter;
void *reply;
struct nlattr *attr;
struct nlattr *nla;
struct sysstats_task *stats;
struct task_struct *p;
short oom_score;
short oom_score_min;
short oom_score_max;
u32 buf;
nla = nla_find(nlmsg_attrdata(cb->nlh, GENL_HDRLEN),
nlmsg_attrlen(cb->nlh, GENL_HDRLEN),
SYSSTATS_TASK_CMD_ATTR_FOREACH);
if (!nla)
goto out;
buf = nla_get_u32(nla);
oom_score_min = (short) (buf & 0xFFFF);
oom_score_max = (short) ((buf >> 16) & 0xFFFF);
iter.tgid = cb->args[0];
iter.task = NULL;
for (iter = next_tgid(ns, iter); iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
if (iter.task->flags & PF_KTHREAD)
continue;
oom_score = iter.task->signal->oom_score_adj;
if ((oom_score < oom_score_min)
|| (oom_score > oom_score_max))
continue;
reply = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, &family, 0, SYSSTATS_TASK_CMD_GET);
if (reply == NULL) {
put_task_struct(iter.task);
break;
}
attr = nla_reserve(skb, SYSSTATS_TASK_TYPE_FOREACH,
sizeof(struct sysstats_task));
if (!attr) {
put_task_struct(iter.task);
genlmsg_cancel(skb, reply);
break;
}
stats = nla_data(attr);
memset(stats, 0, sizeof(struct sysstats_task));
rcu_read_lock();
stats->pid = task_pid_nr_ns(iter.task,
task_active_pid_ns(current));
stats->oom_score = iter.task->signal->oom_score_adj;
rcu_read_unlock();
p = find_lock_task_mm(iter.task);
if (p) {
#define K(x) ((x) << (PAGE_SHIFT - 10))
__acquire(p->alloc_lock);
stats->anon_rss =
K(get_mm_counter(p->mm, MM_ANONPAGES));
stats->file_rss =
K(get_mm_counter(p->mm, MM_FILEPAGES));
stats->shmem_rss =
K(get_mm_counter(p->mm, MM_SHMEMPAGES));
stats->swap_rss =
K(get_mm_counter(p->mm, MM_SWAPENTS));
stats->unreclaimable = K(get_task_unreclaimable_info(p));
task_unlock(p);
#undef K
}
genlmsg_end(skb, reply);
}
cb->args[0] = iter.tgid;
out:
return skb->len;
}
#define K(x) ((x) << (PAGE_SHIFT - 10))
#ifndef CONFIG_NUMA
static void sysstats_fill_zoneinfo(struct sysstats_mem *stats)
{
pg_data_t *pgdat;
struct zone *zone;
struct zone *node_zones;
unsigned long zspages = 0;
pgdat = NODE_DATA(0);
node_zones = pgdat->node_zones;
for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
if (!populated_zone(zone))
continue;
zspages += zone_page_state(zone, NR_ZSPAGES);
if (!strcmp(zone->name, "DMA")) {
stats->dma_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->dma_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->dma_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->dma_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->dma_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
} else if (!strcmp(zone->name, "Normal")) {
stats->normal_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->normal_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->normal_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->normal_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->normal_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
} else if (!strcmp(zone->name, "HighMem")) {
stats->highmem_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->highmem_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->highmem_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->highmem_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->highmem_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
} else if (!strcmp(zone->name, "Movable")) {
stats->movable_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->movable_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->movable_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->movable_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->movable_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
}
}
stats->zram_compressed = K(zspages);
}
#elif
static void sysstats_fill_zoneinfo(struct sysstats_mem *stats)
{
}
#endif
static void sysstats_build(struct sysstats_mem *stats)
{
struct sysinfo i;
si_meminfo(&i);
#ifndef CONFIG_MSM_SYSSTATS_STUB_NONEXPORTED_SYMBOLS
si_swapinfo(&i);
stats->swap_used = K(i.totalswap - i.freeswap);
stats->swap_total = K(i.totalswap);
stats->vmalloc_total = K(vmalloc_nr_pages());
#else
stats->swap_used = 0;
stats->swap_total = 0;
stats->vmalloc_total = 0;
#endif
stats->memtotal = K(i.totalram);
stats->misc_reclaimable =
K(global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
stats->unreclaimable = K(get_system_unreclaimble_info());
stats->buffer = K(i.bufferram);
stats->swapcache = K(total_swapcache_pages());
stats->slab_reclaimable =
K(global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B));
stats->slab_unreclaimable =
K(global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B));
stats->free_cma = K(global_zone_page_state(NR_FREE_CMA_PAGES));
stats->file_mapped = K(global_node_page_state(NR_FILE_MAPPED));
stats->kernelstack = global_node_page_state(NR_KERNEL_STACK_KB);
stats->pagetable = K(global_node_page_state(NR_PAGETABLE));
stats->shmem = K(i.sharedram);
sysstats_fill_zoneinfo(stats);
}
#undef K
static int sysstats_meminfo_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
struct sk_buff *rep_skb;
struct sysstats_mem *stats;
struct nlattr *na;
size_t size;
size = nla_total_size(sizeof(struct sysstats_mem));
rc = prepare_reply(info, SYSSTATS_MEMINFO_CMD_NEW, &rep_skb,
size);
if (rc < 0)
goto err;
na = nla_reserve(rep_skb, SYSSTATS_MEMINFO_TYPE_STATS,
sizeof(struct sysstats_mem));
if (na == NULL) {
nlmsg_free(rep_skb);
rc = -EMSGSIZE;
goto err;
}
stats = nla_data(na);
memset(stats, 0, sizeof(*stats));
sysstats_build(stats);
rc = send_reply(rep_skb, info);
err:
return rc;
}
static const struct genl_ops sysstats_ops[] = {
{
.cmd = SYSSTATS_TASK_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = sysstats_task_user_cmd,
.dumpit = sysstats_task_foreach,
},
{
.cmd = SYSSTATS_MEMINFO_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = sysstats_meminfo_user_cmd,
},
{
.cmd = SYSSTATS_PIDS_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.dumpit = sysstats_all_pids_of_name,
}
};
static struct genl_family family __ro_after_init = {
.name = SYSSTATS_GENL_NAME,
.version = SYSSTATS_GENL_VERSION,
.maxattr = SYSSTATS_CMD_ATTR_MAX,
.module = THIS_MODULE,
.ops = sysstats_ops,
.n_ops = ARRAY_SIZE(sysstats_ops),
.pre_doit = sysstats_pre_doit,
.resv_start_op = SYSSTATS_PIDS_CMD_GET + 1,
};
static int __init sysstats_init(void)
{
int rc;
rc = genl_register_family(&family);
if (rc)
return rc;
sysstats_register_kgsl_stats_cb(sysstats_kgsl_stats);
pr_info("registered sysstats version %d\n", SYSSTATS_GENL_VERSION);
return 0;
}
static void __exit sysstats_exit(void)
{
genl_unregister_family(&family);
}
module_init(sysstats_init);
module_exit(sysstats_exit);
MODULE_IMPORT_NS(MINIDUMP);
MODULE_LICENSE("GPL");

View File

@@ -61,7 +61,6 @@ bool crash_kexec_post_notifiers;
int panic_on_warn __read_mostly;
unsigned long panic_on_taint;
bool panic_on_taint_nousertaint = false;
bool stop_on_panic = false;
static unsigned int warn_limit __read_mostly;
int panic_timeout = CONFIG_PANIC_TIMEOUT;
@@ -398,11 +397,6 @@ void panic(const char *fmt, ...)
panic_print_sys_info(true);
if (stop_on_panic) {
pr_emerg("stop_on_panic is called, freezing...\n");
while(1);
}
if (!panic_blink)
panic_blink = no_blink;
@@ -783,7 +777,6 @@ core_param(panic_print, panic_print, ulong, 0644);
core_param(pause_on_oops, pause_on_oops, int, 0644);
core_param(panic_on_warn, panic_on_warn, int, 0644);
core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
core_param(stop_on_panic, stop_on_panic, bool, 0644);
static int __init oops_setup(char *s)
{
@@ -821,10 +814,3 @@ static int __init panic_on_taint_setup(char *s)
return 0;
}
early_param("panic_on_taint", panic_on_taint_setup);
static int __init stop_on_panic_setup(char *s)
{
stop_on_panic = true;
return 0;
}
early_param("stop_on_panic", stop_on_panic_setup);

View File

@@ -27,13 +27,11 @@
#include <linux/syscore_ops.h>
#include <linux/swait.h>
#include <linux/ftrace.h>
#include <linux/rtc.h>
#include <trace/events/power.h>
#include <linux/compiler.h>
#include <linux/moduleparam.h>
#include <linux/wakeup_reason.h>
#include <trace/hooks/suspend.h>
#include <linux/regulator/machine.h>
#include "power.h"
@@ -425,9 +423,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
suspend_stats.failed_devs[last_dev]);
goto Platform_finish;
}
regulator_show_enabled();
error = platform_suspend_prepare_late(state);
if (error)
goto Devices_early_resume;
@@ -632,18 +627,6 @@ static int enter_state(suspend_state_t state)
return error;
}
static void pm_suspend_marker(char *annotation)
{
struct timespec64 ts;
struct rtc_time tm;
ktime_get_real_ts64(&ts);
rtc_time64_to_tm(ts.tv_sec, &tm);
pr_info("suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n",
annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec);
}
/**
* pm_suspend - Externally visible function for suspending the system.
* @state: System sleep state to enter.
@@ -658,7 +641,7 @@ int pm_suspend(suspend_state_t state)
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
return -EINVAL;
pm_suspend_marker("entry");
pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
error = enter_state(state);
if (error) {
suspend_stats.fail++;
@@ -666,7 +649,7 @@ int pm_suspend(suspend_state_t state)
} else {
suspend_stats.success++;
}
pm_suspend_marker("exit");
pr_info("suspend exit\n");
return error;
}
EXPORT_SYMBOL(pm_suspend);

View File

@@ -7,4 +7,4 @@ obj-$(CONFIG_PRINTK_INDEX) += index.o
obj-$(CONFIG_PRINTK) += printk_support.o
printk_support-y := printk_ringbuffer.o
printk_support-$(CONFIG_SYSCTL) += sysctl.o
printk_support-y += cx_gdsc_debug.o
printk_support-y += cx_gdsc_debug.o

View File

@@ -33,7 +33,6 @@
#include <linux/sched/signal.h>
#include <linux/minmax.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/task_integrity.h>
#include <asm/syscall.h> /* for syscall_get_* */
@@ -1283,7 +1282,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
long ret;
if (request == PTRACE_TRACEME) {
five_ptrace(current, request);
ret = ptrace_traceme();
goto out;
}
@@ -1294,8 +1292,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
goto out;
}
five_ptrace(child, request);
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
ret = ptrace_attach(child, request, addr, data);
goto out_put_task_struct;
@@ -1425,7 +1421,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid,
long ret;
if (request == PTRACE_TRACEME) {
five_ptrace(current, request);
ret = ptrace_traceme();
goto out;
}
@@ -1436,8 +1431,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid,
goto out;
}
five_ptrace(child, request);
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
ret = ptrace_attach(child, request, addr, data);
goto out_put_task_struct;

View File

@@ -33,3 +33,4 @@ obj-y += fair.o
obj-y += build_policy.o
obj-y += build_utility.o
obj-$(CONFIG_ANDROID_VENDOR_HOOKS) += vendor_hooks.o
obj-$(CONFIG_SCHED_WALT) += walt/

41
kernel/sched/walt/Kconfig Normal file
View File

@@ -0,0 +1,41 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# QTI WALT based scheduler
#
menu "QTI WALT based scheduler features"
config SCHED_WALT
tristate "Support window based load tracking"
depends on SMP
help
This feature will allow the scheduler to maintain a tunable window
based set of metrics for tasks and runqueues. These metrics can be
used to guide task placement as well as task frequency requirements
for cpufreq governors.
config SCHED_WALT_DEBUG
tristate "WALT debug module"
depends on SCHED_WALT
select TRACE_PREEMPT_TOGGLE
select TRACE_IRQFLAGS
help
This module provides the means of debugging long preempt and
irq disable code. This helps in identifying the scheduling
latencies. The module rely on preemptirq trace hooks and
print the stacktrace to the ftrace upon long preempt and irq
events. Sysctl knobs are available for the user to configure
the thresholds.
This module also used to crash the system to catch issues
in scenarios like RT throttling and sleeping while in atomic
context etc.
config SCHED_CONSERVATIVE_BOOST_LPM_BIAS
bool "Enable LPM bias if conservative boost is enabled"
default n
help
This feature will allow the scheduler to disable low power
modes on a cpu if conservative boost is active. The cpu
will not enter low power mode for a hysteresis time period,
which can be configured from userspace.
endmenu

View File

@@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
KCOV_INSTRUMENT := n
KCSAN_SANITIZE := n
obj-$(CONFIG_SCHED_WALT) += sched-walt.o
sched-walt-$(CONFIG_SCHED_WALT) := walt.o boost.o sched_avg.o walt_halt.o core_ctl.o trace.o input-boost.o sysctl.o cpufreq_walt.o fixup.o walt_lb.o walt_rt.o walt_cfs.o walt_tp.o walt_config.o walt_cpufreq_cycle_cntr_driver.o walt_gclk_cycle_counter_driver.o walt_cycles.o debugfs.o pipeline.o smart_freq.o mvp_locking.o
obj-$(CONFIG_SCHED_WALT_DEBUG) += sched-walt-debug.o
sched-walt-debug-$(CONFIG_SCHED_WALT_DEBUG) := walt_debug.o preemptirq_long.o

359
kernel/sched/walt/boost.c Normal file
View File

@@ -0,0 +1,359 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
*/
#include <linux/of.h>
#include "walt.h"
#include "trace.h"
/*
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
* with higher capacity than those where a task would have normally
* ended up with their load characteristics. Any entity enabling
* boost is responsible for disabling it as well.
*/
unsigned int sched_boost_type;
enum sched_boost_policy boost_policy;
static DEFINE_MUTEX(boost_mutex);
void walt_init_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = false;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] = false;
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = false;
}
void walt_init_topapp_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = true;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = true;
}
void walt_init_foreground_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = false;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = true;
}
void walt_init_foregroundboost_tg(struct task_group *tg)
{
struct walt_task_group *wtg;
wtg = (struct walt_task_group *) tg->android_vendor_data1;
wtg->colocate = false;
wtg->sched_boost_enable[NO_BOOST] = false;
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
wtg->sched_boost_enable[STORAGE_BOOST] = true;
wtg->sched_boost_enable[BALANCE_BOOST] = true;
}
/*
* Scheduler boost type and boost policy might at first seem unrelated,
* however, there exists a connection between them that will allow us
* to use them interchangeably during placement decisions. We'll explain
* the connection here in one possible way so that the implications are
* clear when looking at placement policies.
*
* When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
* When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
* neither be none nor RESTRAINED.
*/
static void set_boost_policy(int type)
{
if (type == NO_BOOST || type == RESTRAINED_BOOST) {
boost_policy = SCHED_BOOST_NONE;
return;
}
if (hmp_capable()) {
boost_policy = SCHED_BOOST_ON_BIG;
return;
}
boost_policy = SCHED_BOOST_ON_ALL;
}
static bool verify_boost_params(int type)
{
return type >= BALANCE_BOOST_DISABLE && type <= BALANCE_BOOST;
}
static void sched_no_boost_nop(void)
{
}
static void sched_full_throttle_boost_enter(void)
{
core_ctl_set_boost(true);
walt_enable_frequency_aggregation(true);
}
static void sched_full_throttle_boost_exit(void)
{
core_ctl_set_boost(false);
walt_enable_frequency_aggregation(false);
}
static void sched_conservative_boost_enter(void)
{
}
static void sched_conservative_boost_exit(void)
{
}
static void sched_restrained_boost_enter(void)
{
walt_enable_frequency_aggregation(true);
}
static void sched_restrained_boost_exit(void)
{
walt_enable_frequency_aggregation(false);
}
static void sched_storage_boost_enter(void)
{
core_ctl_set_boost(true);
}
static void sched_storage_boost_exit(void)
{
core_ctl_set_boost(false);
}
static void sched_balance_boost_enter(void)
{
core_ctl_set_boost(true);
}
static void sched_balance_boost_exit(void)
{
core_ctl_set_boost(false);
}
struct sched_boost_data {
int refcount;
void (*enter)(void);
void (*exit)(void);
};
static struct sched_boost_data sched_boosts[] = {
[NO_BOOST] = {
.refcount = 0,
.enter = sched_no_boost_nop,
.exit = sched_no_boost_nop,
},
[FULL_THROTTLE_BOOST] = {
.refcount = 0,
.enter = sched_full_throttle_boost_enter,
.exit = sched_full_throttle_boost_exit,
},
[CONSERVATIVE_BOOST] = {
.refcount = 0,
.enter = sched_conservative_boost_enter,
.exit = sched_conservative_boost_exit,
},
[RESTRAINED_BOOST] = {
.refcount = 0,
.enter = sched_restrained_boost_enter,
.exit = sched_restrained_boost_exit,
},
[STORAGE_BOOST] = {
.refcount = 0,
.enter = sched_storage_boost_enter,
.exit = sched_storage_boost_exit,
},
[BALANCE_BOOST] = {
.refcount = 0,
.enter = sched_balance_boost_enter,
.exit = sched_balance_boost_exit,
},
};
#define SCHED_BOOST_START FULL_THROTTLE_BOOST
#define SCHED_BOOST_END (BALANCE_BOOST + 1)
static int sched_effective_boost(void)
{
int i;
/*
* The boosts are sorted in descending order by
* priority.
*/
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++) {
if (sched_boosts[i].refcount >= 1)
return i;
}
return NO_BOOST;
}
static void sched_boost_disable(int type)
{
struct sched_boost_data *sb = &sched_boosts[type];
int next_boost, prev_boost = sched_boost_type;
if (sb->refcount <= 0)
return;
sb->refcount--;
if (sb->refcount)
return;
next_boost = sched_effective_boost();
if (next_boost == prev_boost)
return;
/*
* This boost's refcount becomes zero, so it must
* be disabled. Disable it first and then apply
* the next boost.
*/
sched_boosts[prev_boost].exit();
sched_boosts[next_boost].enter();
}
static void sched_boost_enable(int type)
{
struct sched_boost_data *sb = &sched_boosts[type];
int next_boost, prev_boost = sched_boost_type;
sb->refcount++;
if (sb->refcount != 1)
return;
/*
* This boost enable request did not come before.
* Take this new request and find the next boost
* by aggregating all the enabled boosts. If there
* is a change, disable the previous boost and enable
* the next boost.
*/
next_boost = sched_effective_boost();
if (next_boost == prev_boost)
return;
sched_boosts[prev_boost].exit();
sched_boosts[next_boost].enter();
}
static void sched_boost_disable_all(void)
{
int i;
int prev_boost = sched_boost_type;
if (prev_boost != NO_BOOST) {
sched_boosts[prev_boost].exit();
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++)
sched_boosts[i].refcount = 0;
}
}
static void _sched_set_boost(int type)
{
if (type == 0)
sched_boost_disable_all();
else if (type > 0)
sched_boost_enable(type);
else
sched_boost_disable(-type);
/*
* sysctl_sched_boost holds the boost request from
* user space which could be different from the
* effectively enabled boost. Update the effective
* boost here.
*/
sched_boost_type = sched_effective_boost();
sysctl_sched_boost = sched_boost_type;
set_boost_policy(sysctl_sched_boost);
trace_sched_set_boost(sysctl_sched_boost);
}
int sched_set_boost(int type)
{
int ret = 0;
if (unlikely(walt_disabled))
return -EAGAIN;
mutex_lock(&boost_mutex);
if (verify_boost_params(type))
_sched_set_boost(type);
else
ret = -EINVAL;
mutex_unlock(&boost_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(sched_set_boost);
int sched_boost_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
unsigned int *data = (unsigned int *)table->data;
mutex_lock(&boost_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto done;
if (verify_boost_params(*data))
_sched_set_boost(*data);
else
ret = -EINVAL;
done:
mutex_unlock(&boost_mutex);
return ret;
}
void walt_boost_init(void)
{
/* force call the callbacks for default boost */
sched_set_boost(FULL_THROTTLE_BOOST);
}

1921
kernel/sched/walt/core_ctl.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/debugfs.h>
#include <trace/hooks/sched.h>
#include "walt.h"
#include "trace.h"
unsigned int debugfs_walt_features;
static struct dentry *debugfs_walt;
void walt_register_debugfs(void)
{
debugfs_walt = debugfs_create_dir("walt", NULL);
debugfs_create_u32("walt_features", 0644, debugfs_walt, &debugfs_walt_features);
}

185
kernel/sched/walt/fixup.c Normal file
View File

@@ -0,0 +1,185 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2021-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <trace/hooks/cpufreq.h>
#include <trace/hooks/topology.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include "walt.h"
unsigned int cpuinfo_max_freq_cached;
char sched_lib_name[LIB_PATH_LENGTH];
char sched_lib_task[LIB_PATH_LENGTH];
unsigned int sched_lib_mask_force;
static bool is_sched_lib_based_app(pid_t pid)
{
const char *name = NULL;
char *libname, *lib_list;
struct vm_area_struct *vma;
char path_buf[LIB_PATH_LENGTH];
char *tmp_lib_name;
bool found = false;
struct task_struct *p;
struct mm_struct *mm;
if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0)
return false;
tmp_lib_name = kmalloc(LIB_PATH_LENGTH, GFP_KERNEL);
if (!tmp_lib_name)
return false;
rcu_read_lock();
p = pid ? get_pid_task(find_vpid(pid), PIDTYPE_PID) : get_task_struct(current);
rcu_read_unlock();
if (!p) {
kfree(tmp_lib_name);
return false;
}
mm = get_task_mm(p);
if (mm) {
MA_STATE(mas, &mm->mm_mt, 0, 0);
down_read(&mm->mmap_lock);
mas_for_each(&mas, vma, ULONG_MAX) {
if (vma->vm_file && vma->vm_flags & VM_EXEC) {
name = d_path(&vma->vm_file->f_path,
path_buf, LIB_PATH_LENGTH);
if (IS_ERR(name))
goto release_sem;
strscpy(tmp_lib_name, sched_lib_name, LIB_PATH_LENGTH);
lib_list = tmp_lib_name;
while ((libname = strsep(&lib_list, ","))) {
libname = skip_spaces(libname);
if (strnstr(name, libname,
strnlen(name, LIB_PATH_LENGTH))) {
found = true;
goto release_sem;
}
}
}
}
release_sem:
up_read(&mm->mmap_lock);
mmput(mm);
}
put_task_struct(p);
kfree(tmp_lib_name);
return found;
}
bool is_sched_lib_task(void)
{
if (strnlen(sched_lib_task, LIB_PATH_LENGTH) == 0)
return false;
if (strnstr(current->comm, sched_lib_task, strnlen(current->comm, LIB_PATH_LENGTH)))
return true;
return false;
}
static char cpu_cap_fixup_target[TASK_COMM_LEN];
static int proc_cpu_capacity_fixup_target_show(struct seq_file *m, void *data)
{
seq_printf(m, "%s\n", cpu_cap_fixup_target);
return 0;
}
static int proc_cpu_capacity_fixup_target_open(struct inode *inode,
struct file *file)
{
return single_open(file, proc_cpu_capacity_fixup_target_show, NULL);
}
static ssize_t proc_cpu_capacity_fixup_target_write(struct file *file,
const char __user *buf, size_t count, loff_t *offs)
{
char temp[TASK_COMM_LEN] = {0, };
int len = 0;
len = (count > TASK_COMM_LEN) ? TASK_COMM_LEN : count;
if (copy_from_user(temp, buf, len))
return -EFAULT;
if (temp[len - 1] == '\n')
temp[len - 1] = '\0';
strlcpy(cpu_cap_fixup_target, temp, TASK_COMM_LEN);
return count;
}
static const struct proc_ops proc_cpu_capacity_fixup_target_op = {
.proc_open = proc_cpu_capacity_fixup_target_open,
.proc_write = proc_cpu_capacity_fixup_target_write,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
static void android_rvh_show_max_freq(void *unused, struct cpufreq_policy *policy,
unsigned int *max_freq)
{
int curr_len = 0;
if (!cpuinfo_max_freq_cached)
return;
curr_len = strnlen(current->comm, TASK_COMM_LEN);
if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
*max_freq = cpuinfo_max_freq_cached;
return;
}
}
if (!(BIT(policy->cpu) & sched_lib_mask_force))
return;
if (is_sched_lib_based_app(current->pid) || is_sched_lib_task())
*max_freq = cpuinfo_max_freq_cached << 1;
}
static void android_rvh_cpu_capacity_show(void *unused,
unsigned long *capacity, int cpu)
{
int curr_len = 0;
curr_len = strnlen(current->comm, TASK_COMM_LEN);
if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
*capacity = SCHED_CAPACITY_SCALE;
return;
}
}
if (!soc_sched_lib_name_capacity)
return;
if ((is_sched_lib_based_app(current->pid) || is_sched_lib_task()) &&
cpu < soc_sched_lib_name_capacity)
*capacity = 100;
}
void walt_fixup_init(void)
{
if (!proc_create("cpu_capacity_fixup_target",
0660, NULL, &proc_cpu_capacity_fixup_target_op))
pr_err("Failed to register 'cpu_capacity_fixup_target'\n");
register_trace_android_rvh_show_max_freq(android_rvh_show_max_freq, NULL);
register_trace_android_rvh_cpu_capacity_show(android_rvh_cpu_capacity_show, NULL);
}

View File

@@ -0,0 +1,300 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013-2015,2017,2019-2021, The Linux Foundation. All rights reserved.
*/
#define pr_fmt(fmt) "input-boost: " fmt
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/time.h>
#include <linux/sysfs.h>
#include <linux/pm_qos.h>
#include "walt.h"
#define input_boost_attr_rw(_name) \
static struct kobj_attribute _name##_attr = \
__ATTR(_name, 0644, show_##_name, store_##_name)
#define show_one(file_name) \
static ssize_t show_##file_name \
(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
{ \
return scnprintf(buf, PAGE_SIZE, "%u\n", file_name); \
}
#define store_one(file_name) \
static ssize_t store_##file_name \
(struct kobject *kobj, struct kobj_attribute *attr, \
const char *buf, size_t count) \
{ \
\
sscanf(buf, "%u", &file_name); \
return count; \
}
struct cpu_sync {
int cpu;
unsigned int input_boost_min;
unsigned int input_boost_freq;
};
static DEFINE_PER_CPU(struct cpu_sync, sync_info);
static struct workqueue_struct *input_boost_wq;
static struct work_struct input_boost_work;
static bool sched_boost_active;
static struct delayed_work input_boost_rem;
static u64 last_input_time;
#define MIN_INPUT_INTERVAL (150 * USEC_PER_MSEC)
static DEFINE_PER_CPU(struct freq_qos_request, qos_req);
static void boost_adjust_notify(struct cpufreq_policy *policy)
{
unsigned int cpu = policy->cpu;
struct cpu_sync *s = &per_cpu(sync_info, cpu);
unsigned int ib_min = s->input_boost_min;
struct freq_qos_request *req = &per_cpu(qos_req, cpu);
int ret;
pr_debug("CPU%u policy min before boost: %u kHz\n",
cpu, policy->min);
pr_debug("CPU%u boost min: %u kHz\n", cpu, ib_min);
ret = freq_qos_update_request(req, ib_min);
if (ret < 0)
pr_err("Failed to update freq constraint in boost_adjust: %d\n",
ib_min);
pr_debug("CPU%u policy min after boost: %u kHz\n", cpu, policy->min);
}
static void update_policy_online(void)
{
unsigned int i;
struct cpufreq_policy *policy;
struct cpumask online_cpus;
/* Re-evaluate policy to trigger adjust notifier for online CPUs */
cpus_read_lock();
online_cpus = *cpu_online_mask;
for_each_cpu(i, &online_cpus) {
policy = cpufreq_cpu_get(i);
if (!policy) {
pr_err("%s: cpufreq policy not found for cpu%d\n",
__func__, i);
return;
}
cpumask_andnot(&online_cpus, &online_cpus,
policy->related_cpus);
boost_adjust_notify(policy);
}
cpus_read_unlock();
}
static void do_input_boost_rem(struct work_struct *work)
{
unsigned int i, ret;
struct cpu_sync *i_sync_info;
/* Reset the input_boost_min for all CPUs in the system */
pr_debug("Resetting input boost min for all CPUs\n");
for_each_possible_cpu(i) {
i_sync_info = &per_cpu(sync_info, i);
i_sync_info->input_boost_min = 0;
}
/* Update policies for all online CPUs */
update_policy_online();
if (sched_boost_active) {
ret = sched_set_boost(0);
if (!ret)
pr_err("input-boost: sched boost disable failed\n");
sched_boost_active = false;
}
}
static void do_input_boost(struct work_struct *work)
{
unsigned int cpu, ret;
struct cpu_sync *i_sync_info;
cancel_delayed_work_sync(&input_boost_rem);
if (sched_boost_active) {
sched_set_boost(0);
sched_boost_active = false;
}
/* Set the input_boost_min for all CPUs in the system */
pr_debug("Setting input boost min for all CPUs\n");
for_each_possible_cpu(cpu) {
i_sync_info = &per_cpu(sync_info, cpu);
i_sync_info->input_boost_min = sysctl_input_boost_freq[cpu];
}
/* Update policies for all online CPUs */
update_policy_online();
/* Enable scheduler boost to migrate tasks to big cluster */
if (sysctl_sched_boost_on_input > 0) {
ret = sched_set_boost(sysctl_sched_boost_on_input);
if (ret)
pr_err("input-boost: sched boost enable failed\n");
else
sched_boost_active = true;
}
queue_delayed_work(input_boost_wq, &input_boost_rem,
msecs_to_jiffies(sysctl_input_boost_ms));
}
static void inputboost_input_event(struct input_handle *handle,
unsigned int type, unsigned int code, int value)
{
u64 now;
int cpu;
int enabled = 0;
for_each_possible_cpu(cpu) {
if (sysctl_input_boost_freq[cpu] > 0) {
enabled = 1;
break;
}
}
if (!enabled)
return;
now = ktime_to_us(ktime_get());
if (now - last_input_time < MIN_INPUT_INTERVAL)
return;
if (work_pending(&input_boost_work))
return;
queue_work(input_boost_wq, &input_boost_work);
last_input_time = ktime_to_us(ktime_get());
}
static int inputboost_input_connect(struct input_handler *handler,
struct input_dev *dev, const struct input_device_id *id)
{
struct input_handle *handle;
int error;
handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
if (!handle)
return -ENOMEM;
handle->dev = dev;
handle->handler = handler;
handle->name = "cpufreq";
error = input_register_handle(handle);
if (error)
goto err2;
error = input_open_device(handle);
if (error)
goto err1;
return 0;
err1:
input_unregister_handle(handle);
err2:
kfree(handle);
return error;
}
static void inputboost_input_disconnect(struct input_handle *handle)
{
input_close_device(handle);
input_unregister_handle(handle);
kfree(handle);
}
static const struct input_device_id inputboost_ids[] = {
/* multi-touch touchscreen */
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
INPUT_DEVICE_ID_MATCH_ABSBIT,
.evbit = { BIT_MASK(EV_ABS) },
.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
BIT_MASK(ABS_MT_POSITION_X) |
BIT_MASK(ABS_MT_POSITION_Y)
},
},
/* touchpad */
{
.flags = INPUT_DEVICE_ID_MATCH_KEYBIT |
INPUT_DEVICE_ID_MATCH_ABSBIT,
.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
.absbit = { [BIT_WORD(ABS_X)] =
BIT_MASK(ABS_X) | BIT_MASK(ABS_Y)
},
},
/* Keypad */
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
.evbit = { BIT_MASK(EV_KEY) },
},
{ },
};
static struct input_handler inputboost_input_handler = {
.event = inputboost_input_event,
.connect = inputboost_input_connect,
.disconnect = inputboost_input_disconnect,
.name = "input-boost",
.id_table = inputboost_ids,
};
struct kobject *input_boost_kobj;
int input_boost_init(void)
{
int cpu, ret;
struct cpu_sync *s;
struct cpufreq_policy *policy;
struct freq_qos_request *req;
input_boost_wq = alloc_workqueue("inputboost_wq", WQ_HIGHPRI, 0);
if (!input_boost_wq)
return -EFAULT;
INIT_WORK(&input_boost_work, do_input_boost);
INIT_DELAYED_WORK(&input_boost_rem, do_input_boost_rem);
for_each_possible_cpu(cpu) {
s = &per_cpu(sync_info, cpu);
s->cpu = cpu;
req = &per_cpu(qos_req, cpu);
policy = cpufreq_cpu_get(cpu);
if (!policy) {
pr_err("%s: cpufreq policy not found for cpu%d\n",
__func__, cpu);
return -ESRCH;
}
ret = freq_qos_add_request(&policy->constraints, req,
FREQ_QOS_MIN, policy->min);
if (ret < 0) {
pr_err("%s: Failed to add freq constraint (%d)\n",
__func__, ret);
return ret;
}
}
ret = input_register_handler(&inputboost_input_handler);
return 0;
}

View File

@@ -0,0 +1,44 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <trace/hooks/dtask.h>
#include "../../locking/mutex.h"
#include "walt.h"
static void android_vh_alter_mutex_list_add(void *unused, struct mutex *lock,
struct mutex_waiter *waiter, struct list_head *list,
bool *already_on_list)
{
struct walt_task_struct *wts_waiter =
(struct walt_task_struct *)current->android_vendor_data1;
struct mutex_waiter *pos = NULL;
struct mutex_waiter *n = NULL;
struct list_head *head = list;
struct walt_task_struct *wts;
if (unlikely(walt_disabled))
return;
if (!lock || !waiter || !list)
return;
if (!is_mvp(wts_waiter))
return;
list_for_each_entry_safe(pos, n, head, list) {
wts = (struct walt_task_struct *)
((struct task_struct *)(pos->task)->android_vendor_data1);
if (!is_mvp(wts)) {
list_add(&waiter->list, pos->list.prev);
*already_on_list = true;
break;
}
}
}
void walt_mvp_lock_ordering_init(void)
{
register_trace_android_vh_alter_mutex_list_add(android_vh_alter_mutex_list_add, NULL);
}

View File

@@ -0,0 +1,239 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2013-2014, 2017, 2021, The Linux Foundation. All rights reserved.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM perf_trace_counters
#if !defined(_PERF_TRACE_COUNTERS_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _PERF_TRACE_COUNTERS_H_
/* Ctr index for PMCNTENSET/CLR */
#define CC 0x80000000
#define C0 0x1
#define C1 0x2
#define C2 0x4
#define C3 0x8
#define C4 0x10
#define C5 0x20
#define C_ALL (CC | C0 | C1 | C2 | C3 | C4 | C5)
#define TYPE_MASK 0xFFFF
#define NUM_L1_CTRS 6
#define NUM_AMU_CTRS 3
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/tracepoint.h>
DECLARE_PER_CPU(u32, cntenset_val);
DECLARE_PER_CPU(unsigned long, previous_ccnt);
DECLARE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
DECLARE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
unsigned int state;
#ifdef CONFIG_SCHED_DEBUG
BUG_ON(p != current);
#endif /* CONFIG_SCHED_DEBUG */
/*
* Preemption ignores task state, therefore preempted tasks are always
* RUNNING (we will not have dequeued if state != RUNNING).
*/
if (preempt)
return TASK_REPORT_MAX;
/*
* task_state_index() uses fls() and returns a value from 0-8 range.
* Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
* it for left shift operation to get the correct task->state
* mapping.
*/
state = task_state_index(p);
return state ? (1 << (state - 1)) : state;
}
#endif /* CREATE_TRACE_POINTS */
TRACE_EVENT(sched_switch_with_ctrs,
TP_PROTO(bool preempt,
struct task_struct *prev,
struct task_struct *next),
TP_ARGS(preempt, prev, next),
TP_STRUCT__entry(
__field(pid_t, prev_pid)
__field(pid_t, next_pid)
__array(char, prev_comm, TASK_COMM_LEN)
__array(char, next_comm, TASK_COMM_LEN)
__field(long, prev_state)
__field(unsigned long, cctr)
__field(unsigned long, ctr0)
__field(unsigned long, ctr1)
__field(unsigned long, ctr2)
__field(unsigned long, ctr3)
__field(unsigned long, ctr4)
__field(unsigned long, ctr5)
__field(unsigned long, amu0)
__field(unsigned long, amu1)
__field(unsigned long, amu2)
),
TP_fast_assign(
u32 cpu = smp_processor_id();
u32 i;
u32 cnten_val;
unsigned long total_ccnt = 0;
unsigned long total_cnt = 0;
unsigned long amu_cnt = 0;
unsigned long delta_l1_cnts[NUM_L1_CTRS] = {0};
unsigned long delta_amu_cnts[NUM_AMU_CTRS] = {0};
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_state = __trace_sched_switch_state(preempt, prev);
__entry->prev_pid = prev->pid;
__entry->next_pid = next->pid;
cnten_val = per_cpu(cntenset_val, cpu);
if (cnten_val & CC) {
/* Read value */
total_ccnt = read_sysreg(pmccntr_el0);
__entry->cctr = total_ccnt -
per_cpu(previous_ccnt, cpu);
per_cpu(previous_ccnt, cpu) = total_ccnt;
}
for (i = 0; i < NUM_L1_CTRS; i++) {
if (cnten_val & (1 << i)) {
/* Select */
write_sysreg(i, pmselr_el0);
isb();
/* Read value */
total_cnt = read_sysreg(pmxevcntr_el0);
delta_l1_cnts[i] = total_cnt -
per_cpu(previous_l1_cnts[i], cpu);
per_cpu(previous_l1_cnts[i], cpu) =
total_cnt;
} else
delta_l1_cnts[i] = 0;
}
if (IS_ENABLED(CONFIG_ARM64_AMU_EXTN)) {
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
delta_amu_cnts[0] = amu_cnt -
per_cpu(previous_amu_cnts[0], cpu);
per_cpu(previous_amu_cnts[0], cpu) = amu_cnt;
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
delta_amu_cnts[1] = amu_cnt -
per_cpu(previous_amu_cnts[1], cpu);
per_cpu(previous_amu_cnts[1], cpu) = amu_cnt;
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_MEM_STALL);
delta_amu_cnts[2] = amu_cnt -
per_cpu(previous_amu_cnts[2], cpu);
per_cpu(previous_amu_cnts[2], cpu) = amu_cnt;
}
__entry->ctr0 = delta_l1_cnts[0];
__entry->ctr1 = delta_l1_cnts[1];
__entry->ctr2 = delta_l1_cnts[2];
__entry->ctr3 = delta_l1_cnts[3];
__entry->ctr4 = delta_l1_cnts[4];
__entry->ctr5 = delta_l1_cnts[5];
__entry->amu0 = delta_amu_cnts[0];
__entry->amu1 = delta_amu_cnts[1];
__entry->amu2 = delta_amu_cnts[2];
),
TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s next_pid=%d CCNTR=%lu CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu, CYC: %lu, INST: %lu, STALL: %lu",
__entry->prev_comm, __entry->prev_pid,
(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
__print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
{ TASK_INTERRUPTIBLE, "S" },
{ TASK_UNINTERRUPTIBLE, "D" },
{ __TASK_STOPPED, "T" },
{ __TASK_TRACED, "t" },
{ EXIT_DEAD, "X" },
{ EXIT_ZOMBIE, "Z" },
{ TASK_PARKED, "P" },
{ TASK_DEAD, "I" }) :
"R",
__entry->prev_state & TASK_REPORT_MAX ? "+" : "",
__entry->next_comm,
__entry->next_pid,
__entry->cctr,
__entry->ctr0, __entry->ctr1,
__entry->ctr2, __entry->ctr3,
__entry->ctr4, __entry->ctr5,
__entry->amu0, __entry->amu1,
__entry->amu2)
);
TRACE_EVENT(sched_switch_ctrs_cfg,
TP_PROTO(int cpu),
TP_ARGS(cpu),
TP_STRUCT__entry(
__field(int, cpu)
__field(unsigned long, ctr0)
__field(unsigned long, ctr1)
__field(unsigned long, ctr2)
__field(unsigned long, ctr3)
__field(unsigned long, ctr4)
__field(unsigned long, ctr5)
),
TP_fast_assign(
u32 i;
u32 cnten_val;
u32 ctr_type[NUM_L1_CTRS] = {0};
cnten_val = per_cpu(cntenset_val, cpu);
for (i = 0; i < NUM_L1_CTRS; i++) {
if (cnten_val & (1 << i)) {
/* Select */
write_sysreg(i, pmselr_el0);
isb();
/* Read type */
ctr_type[i] = read_sysreg(pmxevtyper_el0)
& TYPE_MASK;
} else
ctr_type[i] = 0;
}
__entry->cpu = cpu;
__entry->ctr0 = ctr_type[0];
__entry->ctr1 = ctr_type[1];
__entry->ctr2 = ctr_type[2];
__entry->ctr3 = ctr_type[3];
__entry->ctr4 = ctr_type[4];
__entry->ctr5 = ctr_type[5];
),
TP_printk("cpu=%d CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu",
__entry->cpu,
__entry->ctr0, __entry->ctr1,
__entry->ctr2, __entry->ctr3,
__entry->ctr4, __entry->ctr5)
);
#endif
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../kernel/sched/walt
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE perf_trace_counters
#include <trace/define_trace.h>

View File

@@ -0,0 +1,762 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "walt.h"
#include "trace.h"
static DEFINE_RAW_SPINLOCK(pipeline_lock);
static struct walt_task_struct *pipeline_wts[WALT_NR_CPUS];
int pipeline_nr;
static DEFINE_RAW_SPINLOCK(heavy_lock);
static struct walt_task_struct *heavy_wts[MAX_NR_PIPELINE];
bool pipeline_pinning;
static inline int pipeline_demand(struct walt_task_struct *wts)
{
return scale_time_to_util(wts->coloc_demand);
}
int add_pipeline(struct walt_task_struct *wts)
{
int i, pos = -1, ret = -ENOSPC;
unsigned long flags;
int max_nr_pipeline = cpumask_weight(&cpus_for_pipeline);
if (unlikely(walt_disabled))
return -EAGAIN;
raw_spin_lock_irqsave(&pipeline_lock, flags);
for (i = 0; i < max_nr_pipeline; i++) {
if (wts == pipeline_wts[i]) {
ret = 0;
goto out;
}
if (pipeline_wts[i] == NULL)
pos = i;
}
if (pos != -1) {
pipeline_wts[pos] = wts;
pipeline_nr++;
ret = 0;
}
out:
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
return ret;
}
int remove_pipeline(struct walt_task_struct *wts)
{
int i, j, ret = 0;
unsigned long flags;
if (unlikely(walt_disabled))
return -EAGAIN;
raw_spin_lock_irqsave(&pipeline_lock, flags);
for (i = 0; i < WALT_NR_CPUS; i++) {
if (wts == pipeline_wts[i]) {
wts->low_latency &= ~WALT_LOW_LATENCY_PIPELINE_BIT;
pipeline_wts[i] = NULL;
pipeline_nr--;
for (j = i; j < WALT_NR_CPUS - 1; j++) {
pipeline_wts[j] = pipeline_wts[j + 1];
pipeline_wts[j + 1] = NULL;
}
goto out;
}
}
out:
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
return ret;
}
int remove_heavy(struct walt_task_struct *wts)
{
int i, j, ret = 0;
unsigned long flags;
if (unlikely(walt_disabled))
return -EAGAIN;
raw_spin_lock_irqsave(&heavy_lock, flags);
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (wts == heavy_wts[i]) {
wts->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts[i] = NULL;
have_heavy_list--;
for (j = i; j < MAX_NR_PIPELINE - 1; j++) {
heavy_wts[j] = heavy_wts[j + 1];
heavy_wts[j + 1] = NULL;
}
goto out;
}
}
out:
raw_spin_unlock_irqrestore(&heavy_lock, flags);
return ret;
}
void remove_special_task(void)
{
unsigned long flags;
raw_spin_lock_irqsave(&heavy_lock, flags);
/*
* Although the pipeline special task designation is removed,
* if the task is not dead (i.e. this function was called from sysctl context)
* the task will continue to enjoy pipeline priveleges until the next update in
* find_heaviest_topapp()
*/
pipeline_special_task = NULL;
raw_spin_unlock_irqrestore(&heavy_lock, flags);
}
void set_special_task(struct task_struct *pipeline_special_local)
{
unsigned long flags;
raw_spin_lock_irqsave(&heavy_lock, flags);
pipeline_special_task = pipeline_special_local;
raw_spin_unlock_irqrestore(&heavy_lock, flags);
}
cpumask_t cpus_for_pipeline = { CPU_BITS_NONE };
/* always set unisolation for max cluster, for pipeline tasks */
static inline void pipeline_set_unisolation(bool set, int flag)
{
static bool unisolation_state;
struct walt_sched_cluster *cluster;
static unsigned int enable_pipeline_unisolation;
if (!set)
enable_pipeline_unisolation &= ~(1 << flag);
else
enable_pipeline_unisolation |= (1 << flag);
if (unisolation_state && !enable_pipeline_unisolation) {
unisolation_state = false;
for_each_sched_cluster(cluster) {
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
core_ctl_set_cluster_boost(cluster->id, false);
}
} else if (!unisolation_state && enable_pipeline_unisolation) {
unisolation_state = true;
for_each_sched_cluster(cluster) {
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
core_ctl_set_cluster_boost(cluster->id, true);
}
}
}
/*
* sysctl_sched_heavy_nr or sysctl_sched_pipeline_util_thres can change at any moment in time.
* as a result, the ability to set/clear unisolation state for a particular type of pipeline, is
* hindered. Detect a transition and reset the unisolation state of the pipeline method no longer
* in use.
*/
static inline void pipeline_reset_unisolation_state(void)
{
static bool last_auto_pipeline;
if ((sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres) && !last_auto_pipeline) {
pipeline_set_unisolation(false, MANUAL_PIPELINE);
last_auto_pipeline = true;
} else if (!sysctl_sched_heavy_nr &&
!sysctl_sched_pipeline_util_thres && last_auto_pipeline) {
pipeline_set_unisolation(false, AUTO_PIPELINE);
last_auto_pipeline = false;
}
}
static inline bool should_pipeline_pin_special(void)
{
if (!pipeline_special_task)
return false;
if (!heavy_wts[MAX_NR_PIPELINE - 1])
return false;
if (pipeline_demand(heavy_wts[0]) <= sysctl_pipeline_special_task_util_thres)
return true;
if (pipeline_demand(heavy_wts[1]) <= sysctl_pipeline_non_special_task_util_thres)
return true;
if (pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_low_pct, 100)))
return false;
if (!pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_high_pct, 100)))
return false;
return true;
}
cpumask_t last_available_big_cpus = CPU_MASK_NONE;
int have_heavy_list;
u32 total_util;
bool find_heaviest_topapp(u64 window_start)
{
struct walt_related_thread_group *grp;
struct walt_task_struct *wts;
unsigned long flags;
static u64 last_rearrange_ns;
int i, j, start;
struct walt_task_struct *heavy_wts_to_drop[MAX_NR_PIPELINE];
if (num_sched_clusters < 2)
return false;
/* lazy enabling disabling until 100mS for colocation or heavy_nr change */
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
if (!grp || (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres) ||
sched_boost_type) {
if (have_heavy_list) {
raw_spin_lock_irqsave(&heavy_lock, flags);
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (heavy_wts[i]) {
heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts[i]->pipeline_cpu = -1;
heavy_wts[i] = NULL;
}
}
raw_spin_unlock_irqrestore(&heavy_lock, flags);
have_heavy_list = 0;
pipeline_set_unisolation(false, AUTO_PIPELINE);
}
return false;
}
if (last_rearrange_ns && (window_start < (last_rearrange_ns + 100 * MSEC_TO_NSEC)))
return false;
last_rearrange_ns = window_start;
raw_spin_lock_irqsave(&grp->lock, flags);
raw_spin_lock(&heavy_lock);
/* remember the old ones in _to_drop[] */
for (i = 0; i < MAX_NR_PIPELINE; i++) {
heavy_wts_to_drop[i] = heavy_wts[i];
heavy_wts[i] = NULL;
}
/* Assign user specified one (if exists) to slot 0*/
if (pipeline_special_task) {
heavy_wts[0] = (struct walt_task_struct *)
pipeline_special_task->android_vendor_data1;
start = 1;
} else {
start = 0;
}
/*
* Ensure that heavy_wts either contains the top 3 top-app tasks,
* or the user defined heavy task followed by the top 2 top-app tasks
*/
list_for_each_entry(wts, &grp->tasks, grp_list) {
struct walt_task_struct *to_be_placed_wts = wts;
/* if the task hasnt seen action recently skip it */
if (wts->mark_start < window_start - (sched_ravg_window * 2))
continue;
/* skip user defined task as it's already part of the list*/
if (pipeline_special_task && (wts == heavy_wts[0]))
continue;
for (i = start; i < MAX_NR_PIPELINE; i++) {
if (!heavy_wts[i]) {
heavy_wts[i] = to_be_placed_wts;
break;
} else if (pipeline_demand(to_be_placed_wts) >=
pipeline_demand(heavy_wts[i])) {
struct walt_task_struct *tmp;
tmp = heavy_wts[i];
heavy_wts[i] = to_be_placed_wts;
to_be_placed_wts = tmp;
}
}
}
/*
* Determine how many of the top three pipeline tasks
* If "sched_heavy_nr" node is set, the util threshold is ignored.
*/
total_util = 0;
if (sysctl_sched_heavy_nr) {
for (i = sysctl_sched_heavy_nr; i < MAX_NR_PIPELINE; i++)
heavy_wts[i] = NULL;
} else {
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (heavy_wts[i])
total_util += pipeline_demand(heavy_wts[i]);
}
if (total_util < sysctl_sched_pipeline_util_thres)
heavy_wts[MAX_NR_PIPELINE - 1] = NULL;
}
/* reset heavy for tasks that are no longer heavy */
for (i = 0; i < MAX_NR_PIPELINE; i++) {
bool reset = true;
if (!heavy_wts_to_drop[i])
continue;
for (j = 0; j < MAX_NR_PIPELINE; j++) {
if (!heavy_wts[j])
continue;
if (heavy_wts_to_drop[i] == heavy_wts[j]) {
reset = false;
break;
}
}
if (reset) {
heavy_wts_to_drop[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts_to_drop[i]->pipeline_cpu = -1;
}
if (heavy_wts[i]) {
heavy_wts[i]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
}
}
if (heavy_wts[MAX_NR_PIPELINE - 1])
pipeline_set_unisolation(true, AUTO_PIPELINE);
else
pipeline_set_unisolation(false, AUTO_PIPELINE);
raw_spin_unlock(&heavy_lock);
raw_spin_unlock_irqrestore(&grp->lock, flags);
return true;
}
void assign_heaviest_topapp(bool found_topapp)
{
int i;
struct walt_task_struct *wts;
if (!found_topapp)
return;
raw_spin_lock(&heavy_lock);
/* start with non-prime cpus chosen for this chipset (e.g. golds) */
cpumask_and(&last_available_big_cpus, cpu_online_mask, &cpus_for_pipeline);
cpumask_andnot(&last_available_big_cpus, &last_available_big_cpus, cpu_halt_mask);
/*
* Ensure the special task is only pinned if there are 3 auto pipeline tasks and
* check certain demand conditions between special pipeline task and the largest
* non-special pipeline task.
*/
if (should_pipeline_pin_special()) {
pipeline_pinning = true;
heavy_wts[0]->pipeline_cpu =
cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
heavy_wts[0]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
if (cpumask_test_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus))
cpumask_clear_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus);
} else {
pipeline_pinning = false;
}
for (i = 0; i < MAX_NR_PIPELINE; i++) {
wts = heavy_wts[i];
if (!wts)
continue;
if (i == 0 && pipeline_pinning)
continue;
if (wts->pipeline_cpu != -1) {
if (cpumask_test_cpu(wts->pipeline_cpu, &last_available_big_cpus))
cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
else
/* avoid assigning two pipelines to same cpu */
wts->pipeline_cpu = -1;
}
}
have_heavy_list = 0;
/* assign cpus and heavy status to the new heavy */
for (i = 0; i < MAX_NR_PIPELINE; i++) {
wts = heavy_wts[i];
if (!wts)
continue;
if (wts->pipeline_cpu == -1) {
wts->pipeline_cpu = cpumask_last(&last_available_big_cpus);
if (wts->pipeline_cpu >= nr_cpu_ids) {
/* drop from heavy if it can't be assigned */
heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
heavy_wts[i]->pipeline_cpu = -1;
heavy_wts[i] = NULL;
} else {
/*
* clear cpu from the avalilable list of pipeline cpus.
* as pipeline_cpu is assigned for the task.
*/
cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
}
}
if (wts->pipeline_cpu >= 0)
have_heavy_list++;
}
if (trace_sched_pipeline_tasks_enabled()) {
for (i = 0; i < MAX_NR_PIPELINE; i++) {
if (heavy_wts[i] != NULL)
trace_sched_pipeline_tasks(AUTO_PIPELINE, i, heavy_wts[i],
have_heavy_list, total_util, pipeline_pinning);
}
}
raw_spin_unlock(&heavy_lock);
}
static inline void swap_pipeline_with_prime_locked(struct walt_task_struct *prime_wts,
struct walt_task_struct *other_wts)
{
if (prime_wts && other_wts) {
if (pipeline_demand(prime_wts) < pipeline_demand(other_wts)) {
int cpu;
cpu = other_wts->pipeline_cpu;
other_wts->pipeline_cpu = prime_wts->pipeline_cpu;
prime_wts->pipeline_cpu = cpu;
trace_sched_pipeline_swapped(other_wts, prime_wts);
}
} else if (!prime_wts && other_wts) {
/* if prime preferred died promote gold to prime, assumes 1 prime */
other_wts->pipeline_cpu =
cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
trace_sched_pipeline_swapped(other_wts, prime_wts);
}
}
#define WINDOW_HYSTERESIS 4
static inline bool delay_rearrange(u64 window_start, int pipeline_type, bool force)
{
static u64 last_rearrange_ns[MAX_PIPELINE_TYPES];
if (!force && last_rearrange_ns[pipeline_type] &&
(window_start < (last_rearrange_ns[pipeline_type] +
(sched_ravg_window*WINDOW_HYSTERESIS))))
return true;
last_rearrange_ns[pipeline_type] = window_start;
return false;
}
static inline void find_prime_and_max_tasks(struct walt_task_struct **wts_list,
struct walt_task_struct **prime_wts,
struct walt_task_struct **other_wts)
{
int i;
int max_demand = 0;
for (i = 0; i < MAX_NR_PIPELINE; i++) {
struct walt_task_struct *wts = wts_list[i];
if (wts == NULL)
continue;
if (wts->pipeline_cpu < 0)
continue;
if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
if (prime_wts)
*prime_wts = wts;
} else if (other_wts && pipeline_demand(wts) > max_demand) {
max_demand = pipeline_demand(wts);
*other_wts = wts;
}
}
}
static inline bool is_prime_worthy(struct walt_task_struct *wts)
{
struct task_struct *p;
if (wts == NULL)
return false;
if (num_sched_clusters < 2)
return true;
p = wts_to_ts(wts);
/*
* Assume the first row of cpu arrays represents the order of clusters
* in magnitude of capacities, where the last column represents prime,
* and the second to last column represents golds
*/
return !task_fits_max(p, cpumask_last(&cpu_array[0][num_sched_clusters - 2]));
}
void rearrange_heavy(u64 window_start, bool force)
{
struct walt_task_struct *prime_wts = NULL;
struct walt_task_struct *other_wts = NULL;
unsigned long flags;
if (num_sched_clusters < 2)
return;
raw_spin_lock_irqsave(&heavy_lock, flags);
/*
* TODO: As primes are isolated under have_heavy_list < 3, and pipeline misfits are also
* disabled, setting the prime worthy task's pipeline_cpu as CPU7 could lead to the
* pipeline_cpu selection being ignored until the next run of find_heaviest_toppapp(),
* and furthermore remove the task's current gold pipeline_cpu, which could cause the
* task to start bouncing around on the golds, and ultimately lead to suboptimal behavior.
*/
if (have_heavy_list <= 2) {
find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
if (prime_wts && !is_prime_worthy(prime_wts)) {
int assign_cpu;
/* demote prime_wts, it is not worthy */
assign_cpu = cpumask_first(&last_available_big_cpus);
if (assign_cpu < nr_cpu_ids) {
prime_wts->pipeline_cpu = assign_cpu;
cpumask_clear_cpu(assign_cpu, &last_available_big_cpus);
prime_wts = NULL;
}
/* if no pipeline cpu available to assign, leave task on prime */
}
if (!prime_wts && is_prime_worthy(other_wts)) {
/* promote other_wts to prime, it is worthy */
swap_pipeline_with_prime_locked(NULL, other_wts);
}
goto out;
}
if (pipeline_pinning)
goto out;
if (delay_rearrange(window_start, AUTO_PIPELINE, force))
goto out;
if (!soc_feat(SOC_ENABLE_PIPELINE_SWAPPING_BIT) && !force)
goto out;
/* swap prime for have_heavy_list >= 3 */
find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
swap_pipeline_with_prime_locked(prime_wts, other_wts);
out:
raw_spin_unlock_irqrestore(&heavy_lock, flags);
}
void rearrange_pipeline_preferred_cpus(u64 window_start)
{
unsigned long flags;
struct walt_task_struct *wts;
bool set_unisolation = false;
u32 max_demand = 0;
struct walt_task_struct *prime_wts = NULL;
struct walt_task_struct *other_wts = NULL;
static int assign_cpu = -1;
static bool last_set_unisolation;
int i;
if (sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres)
return;
if (num_sched_clusters < 2)
return;
if (!pipeline_nr || sched_boost_type)
goto out;
if (delay_rearrange(window_start, MANUAL_PIPELINE, false))
goto out;
raw_spin_lock_irqsave(&pipeline_lock, flags);
set_unisolation = true;
for (i = 0; i < WALT_NR_CPUS; i++) {
wts = pipeline_wts[i];
if (!wts)
continue;
if (!wts->grp)
wts->pipeline_cpu = -1;
/*
* assummes that if one pipeline doesn't have preferred set,
* all pipelines too do not have it set
*/
if (wts->pipeline_cpu == -1) {
assign_cpu = cpumask_next_and(assign_cpu,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
/* reset and rotate the cpus */
assign_cpu = cpumask_next_and(-1,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
wts->pipeline_cpu = -1;
else
wts->pipeline_cpu = assign_cpu;
}
if (wts->pipeline_cpu != -1) {
if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
/* assumes just one prime */
prime_wts = wts;
} else if (pipeline_demand(wts) > max_demand) {
max_demand = pipeline_demand(wts);
other_wts = wts;
}
}
}
if (pipeline_nr <= 2) {
set_unisolation = false;
if (prime_wts && !is_prime_worthy(prime_wts)) {
/* demote prime_wts, it is not worthy */
assign_cpu = cpumask_next_and(assign_cpu,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
/* reset and rotate the cpus */
assign_cpu = cpumask_next_and(-1,
&cpus_for_pipeline, cpu_online_mask);
if (assign_cpu >= nr_cpu_ids)
prime_wts->pipeline_cpu = -1;
else
prime_wts->pipeline_cpu = assign_cpu;
prime_wts = NULL;
}
if (!prime_wts && is_prime_worthy(other_wts)) {
/* promote other_wts to prime, it is worthy */
swap_pipeline_with_prime_locked(NULL, other_wts);
set_unisolation = true;
}
if (prime_wts)
set_unisolation = true;
goto release_lock;
}
/* swap prime for nr_piprline >= 3 */
swap_pipeline_with_prime_locked(prime_wts, other_wts);
if (trace_sched_pipeline_tasks_enabled()) {
for (i = 0; i < WALT_NR_CPUS; i++) {
if (pipeline_wts[i] != NULL)
trace_sched_pipeline_tasks(MANUAL_PIPELINE, i, pipeline_wts[i],
pipeline_nr, 0, 0);
}
}
release_lock:
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
out:
if (set_unisolation ^ last_set_unisolation) {
pipeline_set_unisolation(set_unisolation, MANUAL_PIPELINE);
last_set_unisolation = set_unisolation;
}
}
bool pipeline_check(struct walt_rq *wrq)
{
/* found_topapp should force rearrangement */
bool found_topapp = find_heaviest_topapp(wrq->window_start);
rearrange_pipeline_preferred_cpus(wrq->window_start);
pipeline_reset_unisolation_state();
return found_topapp;
}
void pipeline_rearrange(struct walt_rq *wrq, bool found_topapp)
{
assign_heaviest_topapp(found_topapp);
rearrange_heavy(wrq->window_start, found_topapp);
}
bool enable_load_sync(int cpu)
{
if (!cpumask_test_cpu(cpu, &pipeline_sync_cpus))
return false;
if (!pipeline_in_progress())
return false;
/*
* Under manual pipeline, only load sync between the pipeline_sync_cpus, if at least one
* of the CPUs userspace has allocated for pipeline tasks corresponds to the
* pipeline_sync_cpus
*/
if (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres &&
!cpumask_intersects(&pipeline_sync_cpus, &cpus_for_pipeline))
return false;
/* Ensure to load sync only if there are 3 auto pipeline tasks */
if (have_heavy_list)
return have_heavy_list == MAX_NR_PIPELINE;
/*
* If auto pipeline is disabled, manual must be on. Ensure to load sync under manual
* pipeline only if there are 3 or more pipeline tasks
*/
return pipeline_nr >= MAX_NR_PIPELINE;
}
/*
* pipeline_fits_smaller_cpus evaluates if a pipeline task should be treated as a misfit.
* There are three possible outcomes:
* - ret -1: Continue evaluation with task_fits_max().
* - ret 0: Task should be treated as a misfit (does not fit on smaller CPUs).
* - ret 1: Task cannot be treated as a misfit (fits on smaller CPUs).
*
* If the task is assigned a pipeline CPU which is a prime CPU, ret should be 0, indicating
* the task is a misfit.
* If the number of pipeline tasks is 2 or fewer, continue evaluation of task_fits_max().
* If the number of pipeline tasks is 3 or more, ret should be 1, indicating the task fits on the
* smaller CPUs and is not a misfit.
*/
int pipeline_fits_smaller_cpus(struct task_struct *p)
{
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
unsigned int pipeline_cpu = wts->pipeline_cpu;
if (pipeline_cpu == -1)
return -1;
if (cpumask_test_cpu(pipeline_cpu, &cpu_array[0][num_sched_clusters-1]))
return 0;
if (have_heavy_list) {
if (have_heavy_list == MAX_NR_PIPELINE)
return 1;
else
return -1;
}
if (pipeline_nr >= MAX_NR_PIPELINE)
return 1;
else
return -1;
}

View File

@@ -0,0 +1,176 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2020-2021 The Linux Foundation. All rights reserved.
*/
#include <linux/ftrace.h>
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/printk.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <trace/hooks/preemptirq.h>
#define CREATE_TRACE_POINTS
#include "preemptirq_long.h"
#define IRQSOFF_SENTINEL 0x0fffDEAD
static unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000;
static unsigned int sysctl_irqsoff_tracing_threshold_ns = 5000000;
static unsigned int sysctl_irqsoff_dmesg_output_enabled;
static unsigned int sysctl_irqsoff_crash_sentinel_value;
static unsigned int sysctl_irqsoff_crash_threshold_ns = 10000000;
static unsigned int half_million = 500000;
static unsigned int one_hundred_million = 100000000;
static unsigned int one_million = 1000000;
static DEFINE_PER_CPU(u64, irq_disabled_ts);
/*
* preemption disable tracking require additional context
* to rule out false positives. see the comment in
* test_preempt_disable_long() for more details.
*/
struct preempt_store {
u64 ts;
int pid;
unsigned long ncsw;
};
static DEFINE_PER_CPU(struct preempt_store, the_ps);
static void note_irq_disable(void *u1, unsigned long u2, unsigned long u3)
{
if (is_idle_task(current))
return;
/*
* We just have to note down the time stamp here. We
* use stacktrace trigger feature to print the stacktrace.
*/
this_cpu_write(irq_disabled_ts, sched_clock());
}
static void test_irq_disable_long(void *u1, unsigned long ip, unsigned long parent_ip)
{
u64 ts = this_cpu_read(irq_disabled_ts);
if (!ts)
return;
this_cpu_write(irq_disabled_ts, 0);
ts = sched_clock() - ts;
if (ts > sysctl_irqsoff_tracing_threshold_ns) {
trace_irq_disable_long(ts, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
if (sysctl_irqsoff_dmesg_output_enabled == IRQSOFF_SENTINEL)
printk_deferred("irqs off exceeds thresh delta=%llu C:(%ps<-%ps<-%ps<-%ps)\n",
ts, (void *)CALLER_ADDR2,
(void *)CALLER_ADDR3,
(void *)CALLER_ADDR4,
(void *)CALLER_ADDR5);
}
if (sysctl_irqsoff_crash_sentinel_value == IRQSOFF_SENTINEL &&
ts > sysctl_irqsoff_crash_threshold_ns) {
printk_deferred("delta=%llu(ns) > crash_threshold=%u(ns) Task=%s\n",
ts, sysctl_irqsoff_crash_threshold_ns,
current->comm);
BUG_ON(1);
}
}
static void note_preempt_disable(void *u1, unsigned long u2, unsigned long u3)
{
struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
ps->ts = sched_clock();
ps->pid = current->pid;
ps->ncsw = current->nvcsw + current->nivcsw;
}
static void test_preempt_disable_long(void *u1, unsigned long ip,
unsigned long parent_ip)
{
struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
u64 delta = 0;
if (!ps->ts)
return;
/*
* schedule() calls __schedule() with preemption disabled.
* if we had entered idle and exiting idle now, we think
* preemption is disabled the whole time. Detect this by
* checking if the preemption is disabled across the same
* task. There is a possiblity that the same task is scheduled
* after idle. To rule out this possibility, compare the
* context switch count also.
*/
if (ps->pid == current->pid && (ps->ncsw == current->nvcsw +
current->nivcsw))
delta = sched_clock() - ps->ts;
ps->ts = 0;
if (delta > sysctl_preemptoff_tracing_threshold_ns)
trace_preempt_disable_long(delta, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
}
static struct ctl_table preemptirq_long_table[] = {
{
.procname = "preemptoff_tracing_threshold_ns",
.data = &sysctl_preemptoff_tracing_threshold_ns,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "irqsoff_tracing_threshold_ns",
.data = &sysctl_irqsoff_tracing_threshold_ns,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
.extra1 = &half_million,
.extra2 = &one_hundred_million,
},
{
.procname = "irqsoff_dmesg_output_enabled",
.data = &sysctl_irqsoff_dmesg_output_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "irqsoff_crash_sentinel_value",
.data = &sysctl_irqsoff_crash_sentinel_value,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "irqsoff_crash_threshold_ns",
.data = &sysctl_irqsoff_crash_threshold_ns,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
.extra1 = &one_million,
.extra2 = &one_hundred_million,
},
};
int preemptirq_long_init(void)
{
if (!register_sysctl("preemptirq", preemptirq_long_table)) {
pr_err("Fail to register sysctl table\n");
return -EPERM;
}
register_trace_android_rvh_irqs_disable(note_irq_disable, NULL);
register_trace_android_rvh_irqs_enable(test_irq_disable_long, NULL);
register_trace_android_rvh_preempt_disable(note_preempt_disable, NULL);
register_trace_android_rvh_preempt_enable(test_preempt_disable_long,
NULL);
return 0;
}

View File

@@ -0,0 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021 The Linux Foundation. All rights reserved.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM preemptirq_long
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#if !defined(_TRACE_PREEMPTIRQ_LONG_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PREEMPTIRQ_LONG_H
#include <linux/tracepoint.h>
/* reference preemptirq_template */
DECLARE_EVENT_CLASS(preemptirq_long_template,
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
unsigned long pparent_ip, unsigned long ppparent_ip),
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip),
TP_STRUCT__entry(
__field(u64, delta)
__field(unsigned long, caller_offs)
__field(unsigned long, parent_offs)
__field(unsigned long, pparent_offs)
__field(unsigned long, ppparent_offs)
),
TP_fast_assign(
__entry->delta = delta;
__entry->caller_offs = ip;
__entry->parent_offs = parent_ip;
__entry->pparent_offs = pparent_ip;
__entry->ppparent_offs = ppparent_ip;
),
TP_printk("delta=%llu(ns) caller=%ps <- %ps <- %ps <- %ps",
__entry->delta, (void *)__entry->caller_offs,
(void *)__entry->parent_offs, (void *)__entry->pparent_offs,
(void *)__entry->ppparent_offs)
);
DEFINE_EVENT(preemptirq_long_template, irq_disable_long,
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
unsigned long pparent_ip, unsigned long ppparent_ip),
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
DEFINE_EVENT(preemptirq_long_template, preempt_disable_long,
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
unsigned long pparent_ip, unsigned long ppparent_ip),
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
#endif /* _TRACE_PREEMPTIRQ_LONG_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@@ -0,0 +1,397 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012, 2015-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
/*
* Scheduler hook for average runqueue determination
*/
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
#include <linux/sched.h>
#include <linux/math64.h>
#include "walt.h"
#include "trace.h"
static DEFINE_PER_CPU(u64, nr_prod_sum);
static DEFINE_PER_CPU(u64, last_time);
static DEFINE_PER_CPU(int, last_time_cpu);
static DEFINE_PER_CPU(u64, nr_big_prod_sum);
static DEFINE_PER_CPU(u64, nr_trailblazer_prod_sum);
static DEFINE_PER_CPU(u64, nr);
static DEFINE_PER_CPU(u64, nr_max);
static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
static s64 last_get_time;
static DEFINE_PER_CPU(atomic64_t, busy_hyst_end_time) = ATOMIC64_INIT(0);
static DEFINE_PER_CPU(u64, hyst_time);
static DEFINE_PER_CPU(u64, coloc_hyst_busy);
static DEFINE_PER_CPU(u64, coloc_hyst_time);
static DEFINE_PER_CPU(u64, util_hyst_time);
static DEFINE_PER_CPU(u64, smart_freq_legacy_reason_hyst_ns);
#define NR_THRESHOLD_PCT 40
#define NR_THRESHOLD_TRAIL_PCT 80
#define MAX_RTGB_TIME (sysctl_sched_coloc_busy_hyst_max_ms * NSEC_PER_MSEC)
struct sched_avg_stats stats[WALT_NR_CPUS];
unsigned int cstats_util_pct[MAX_CLUSTERS];
u8 smart_freq_legacy_reason_hyst_ms[LEGACY_SMART_FREQ][WALT_NR_CPUS];
/**
* sched_get_cluster_util_pct
* @return: provide the percentage of this cluter that was used in the
* previous window.
*
* This routine may be called any number of times as needed during
* a window, but will always return the same result until window
* rollover.
*/
unsigned int sched_get_cluster_util_pct(struct walt_sched_cluster *cluster)
{
unsigned int cluster_util_pct = 0;
if (cluster->id < MAX_CLUSTERS)
cluster_util_pct = cstats_util_pct[cluster->id];
return cluster_util_pct;
}
bool trailblazer_state;
/**
* sched_get_nr_running_avg
* @return: Average nr_running, iowait and nr_big_tasks value since last poll.
* Returns the avg * 100 to return up to two decimal points
* of accuracy.
*
* Obtains the average nr_running value since the last poll.
* This function may not be called concurrently with itself.
*
* It is assumed that this function is called at most once per window
* rollover.
*/
struct sched_avg_stats *sched_get_nr_running_avg(void)
{
int cpu;
u64 curr_time = sched_clock();
u64 period = curr_time - last_get_time;
u64 tmp_nr, tmp_misfit, tmp_trailblazer;
bool any_hyst_time = false;
struct walt_sched_cluster *cluster;
bool trailblazer_cpu = false;
if (unlikely(walt_disabled))
return NULL;
if (!period)
goto done;
/* read and reset nr_running counts */
for_each_possible_cpu(cpu) {
unsigned long flags;
u64 diff;
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
curr_time = sched_clock();
diff = curr_time - per_cpu(last_time, cpu);
if ((s64)diff < 0) {
printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
cpu, curr_time, curr_time, per_cpu(last_time, cpu),
per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
WALT_PANIC(1);
}
tmp_nr = per_cpu(nr_prod_sum, cpu);
tmp_nr += per_cpu(nr, cpu) * diff;
tmp_nr = div64_u64((tmp_nr * 100), period);
tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
tmp_misfit += walt_big_tasks(cpu) * diff;
tmp_misfit = div64_u64((tmp_misfit * 100), period);
tmp_trailblazer = per_cpu(nr_trailblazer_prod_sum, cpu);
tmp_trailblazer += walt_trailblazer_tasks(cpu) * diff;
tmp_trailblazer = div64_u64((tmp_trailblazer * 100), period);
/*
* NR_THRESHOLD_PCT is to make sure that the task ran
* at least 85% in the last window to compensate any
* over estimating being done.
*/
stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
100);
stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
NR_THRESHOLD_PCT), 100);
trailblazer_cpu |= (int)div64_u64((tmp_trailblazer +
NR_THRESHOLD_TRAIL_PCT), 100);
stats[cpu].nr_max = per_cpu(nr_max, cpu);
stats[cpu].nr_scaled = tmp_nr;
trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
stats[cpu].nr_misfit, stats[cpu].nr_max,
stats[cpu].nr_scaled, trailblazer_cpu);
per_cpu(last_time, cpu) = curr_time;
per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
per_cpu(nr_prod_sum, cpu) = 0;
per_cpu(nr_big_prod_sum, cpu) = 0;
per_cpu(nr_trailblazer_prod_sum, cpu) = 0;
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
trailblazer_state = trailblazer_cpu;
/* collect cluster load stats */
for_each_sched_cluster(cluster) {
unsigned int num_cpus = cpumask_weight(&cluster->cpus);
unsigned int sum_util_pct = 0;
/* load is already scaled, see freq_policy_load/prev_runnable_sum */
for_each_cpu(cpu, &cluster->cpus) {
struct rq *rq = cpu_rq(cpu);
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
/* compute the % this cpu's utilization of the cpu capacity,
* and sum it across all cpus
*/
sum_util_pct +=
(wrq->util * 100) / arch_scale_cpu_capacity(cpu);
}
/* calculate the averge per-cpu utilization */
cstats_util_pct[cluster->id] = sum_util_pct / num_cpus;
}
for_each_possible_cpu(cpu) {
if (per_cpu(coloc_hyst_time, cpu)) {
any_hyst_time = true;
break;
}
}
if (any_hyst_time && get_rtgb_active_time() >= MAX_RTGB_TIME)
sched_update_hyst_times();
last_get_time = curr_time;
done:
return &stats[0];
}
EXPORT_SYMBOL_GPL(sched_get_nr_running_avg);
void sched_update_hyst_times(void)
{
bool rtgb_active;
int cpu;
unsigned long cpu_cap, coloc_busy_pct;
rtgb_active = is_rtgb_active() && (sched_boost_type != CONSERVATIVE_BOOST)
&& (get_rtgb_active_time() < MAX_RTGB_TIME);
for_each_possible_cpu(cpu) {
cpu_cap = arch_scale_cpu_capacity(cpu);
coloc_busy_pct = sysctl_sched_coloc_busy_hyst_cpu_busy_pct[cpu];
per_cpu(hyst_time, cpu) = (BIT(cpu)
& sysctl_sched_busy_hyst_enable_cpus) ?
sysctl_sched_busy_hyst : 0;
per_cpu(coloc_hyst_time, cpu) = ((BIT(cpu)
& sysctl_sched_coloc_busy_hyst_enable_cpus)
&& rtgb_active) ?
sysctl_sched_coloc_busy_hyst_cpu[cpu] : 0;
per_cpu(coloc_hyst_busy, cpu) = mult_frac(cpu_cap,
coloc_busy_pct, 100);
per_cpu(util_hyst_time, cpu) = (BIT(cpu)
& sysctl_sched_util_busy_hyst_enable_cpus) ?
sysctl_sched_util_busy_hyst_cpu[cpu] : 0;
}
}
#define BUSY_NR_RUN 3
#define BUSY_LOAD_FACTOR 10
static inline void update_busy_hyst_end_time(int cpu, int enq,
unsigned long prev_nr_run, u64 curr_time)
{
bool nr_run_trigger = false;
bool load_trigger = false, coloc_load_trigger = false;
u64 agg_hyst_time, total_util = 0;
bool util_load_trigger = false;
int i;
bool hyst_trigger, coloc_trigger;
bool dequeue = (enq < 0);
if (is_max_possible_cluster_cpu(cpu) && is_obet)
return;
if (!per_cpu(hyst_time, cpu) && !per_cpu(coloc_hyst_time, cpu) &&
!per_cpu(util_hyst_time, cpu) && !per_cpu(smart_freq_legacy_reason_hyst_ns, cpu))
return;
if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
nr_run_trigger = true;
if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
capacity_orig_of(cpu))
load_trigger = true;
if (dequeue && cpu_util(cpu) > per_cpu(coloc_hyst_busy, cpu))
coloc_load_trigger = true;
if (dequeue) {
for_each_possible_cpu(i) {
total_util += cpu_util(i);
if (total_util >= sysctl_sched_util_busy_hyst_cpu_util[cpu]) {
util_load_trigger = true;
break;
}
}
}
coloc_trigger = nr_run_trigger || coloc_load_trigger;
#if IS_ENABLED(CONFIG_SCHED_CONSERVATIVE_BOOST_LPM_BIAS)
hyst_trigger = nr_run_trigger || load_trigger || (sched_boost_type == CONSERVATIVE_BOOST);
#else
hyst_trigger = nr_run_trigger || load_trigger;
#endif
agg_hyst_time = max(max(hyst_trigger ? per_cpu(hyst_time, cpu) : 0,
coloc_trigger ? per_cpu(coloc_hyst_time, cpu) : 0),
util_load_trigger ? per_cpu(util_hyst_time, cpu) : 0);
agg_hyst_time = max(agg_hyst_time, per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
if (agg_hyst_time) {
atomic64_set(&per_cpu(busy_hyst_end_time, cpu),
curr_time + agg_hyst_time);
trace_sched_busy_hyst_time(cpu, agg_hyst_time, prev_nr_run,
cpu_util(cpu), per_cpu(hyst_time, cpu),
per_cpu(coloc_hyst_time, cpu),
per_cpu(util_hyst_time, cpu),
per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
}
}
int sched_busy_hyst_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
if (table->maxlen > (sizeof(unsigned int) * num_possible_cpus()))
table->maxlen = sizeof(unsigned int) * num_possible_cpus();
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
sched_update_hyst_times();
return ret;
}
/**
* sched_update_nr_prod
* @cpu: The core id of the nr running driver.
* @enq: enqueue/dequeue/misfit happening on this CPU.
* @return: N/A
*
* Update average with latest nr_running value for CPU
*/
void sched_update_nr_prod(int cpu, int enq)
{
u64 diff;
u64 curr_time;
unsigned long flags, nr_running;
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
nr_running = per_cpu(nr, cpu);
curr_time = sched_clock();
diff = curr_time - per_cpu(last_time, cpu);
if ((s64)diff < 0) {
printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
cpu, curr_time, curr_time, per_cpu(last_time, cpu),
per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
WALT_PANIC(1);
}
per_cpu(last_time, cpu) = curr_time;
per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
per_cpu(nr, cpu) = cpu_rq(cpu)->nr_running + enq;
if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
/* Don't update hyst time for misfit tasks */
if (enq)
update_busy_hyst_end_time(cpu, enq, nr_running, curr_time);
per_cpu(nr_prod_sum, cpu) += nr_running * diff;
per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
per_cpu(nr_trailblazer_prod_sum, cpu) += (u64) walt_trailblazer_tasks(cpu) * diff;
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
/*
* Returns the CPU utilization % in the last window.
*/
unsigned int sched_get_cpu_util_pct(int cpu)
{
struct rq *rq = cpu_rq(cpu);
u64 util;
unsigned long capacity, flags;
unsigned int busy;
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
raw_spin_lock_irqsave(&rq->__lock, flags);
capacity = capacity_orig_of(cpu);
util = wrq->prev_runnable_sum + wrq->grp_time.prev_runnable_sum;
util = scale_time_to_util(util);
raw_spin_unlock_irqrestore(&rq->__lock, flags);
util = (util >= capacity) ? capacity : util;
busy = div64_ul((util * 100), capacity);
return busy;
}
int sched_lpm_disallowed_time(int cpu, u64 *timeout)
{
u64 now = sched_clock();
u64 bias_end_time = atomic64_read(&per_cpu(busy_hyst_end_time, cpu));
if (unlikely(walt_disabled))
return -EAGAIN;
if (unlikely(is_reserved(cpu))) {
*timeout = 10 * NSEC_PER_MSEC;
return 0; /* shallowest c-state */
}
if (now < bias_end_time) {
*timeout = bias_end_time - now;
return 0; /* shallowest c-state */
}
return INT_MAX; /* don't care */
}
EXPORT_SYMBOL_GPL(sched_lpm_disallowed_time);
void update_smart_freq_legacy_reason_hyst_time(struct walt_sched_cluster *cluster)
{
int cpu, i;
u8 max_hyst_ms;
for_each_cpu(cpu, &cluster->cpus) {
max_hyst_ms = 0;
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
if (cluster->smart_freq_info->cluster_active_reason & BIT(i))
max_hyst_ms =
max(smart_freq_legacy_reason_hyst_ms[i][cpu],
max_hyst_ms);
}
per_cpu(smart_freq_legacy_reason_hyst_ns, cpu) = max_hyst_ms * NSEC_PER_MSEC;
}
}

View File

@@ -0,0 +1,589 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/tick.h>
#include "walt.h"
#include "trace.h"
#include <trace/events/power.h>
bool smart_freq_init_done;
char reason_dump[1024];
static DEFINE_MUTEX(freq_reason_mutex);
int sched_smart_freq_legacy_dump_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = -EINVAL, pos = 0, i, j;
if (!smart_freq_init_done)
return -EINVAL;
mutex_lock(&freq_reason_mutex);
for (j = 0; j < num_sched_clusters; j++) {
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%llu:%d\n", j, i,
default_freq_config[j].legacy_reason_config[i].freq_allowed,
default_freq_config[j].legacy_reason_config[i].hyst_ns,
!!(default_freq_config[j].smart_freq_participation_mask &
BIT(i)));
}
}
ret = proc_dostring(table, write, buffer, lenp, ppos);
mutex_unlock(&freq_reason_mutex);
return ret;
}
int sched_smart_freq_ipc_dump_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = -EINVAL, pos = 0, i, j;
if (!smart_freq_init_done)
return -EINVAL;
mutex_lock(&freq_reason_mutex);
for (j = 0; j < num_sched_clusters; j++) {
for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%lu:%llu:%d\n", j, i,
default_freq_config[j].ipc_reason_config[i].ipc,
default_freq_config[j].ipc_reason_config[i].freq_allowed,
default_freq_config[j].ipc_reason_config[i].hyst_ns,
!!(default_freq_config[j].smart_freq_ipc_participation_mask &
BIT(i)));
}
}
ret = proc_dostring(table, write, buffer, lenp, ppos);
mutex_unlock(&freq_reason_mutex);
return ret;
}
int sched_smart_freq_ipc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
int cluster_id = -1;
unsigned long no_reason_freq;
int i;
unsigned int *data = (unsigned int *)table->data;
int val[SMART_FMAX_IPC_MAX];
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(int) * SMART_FMAX_IPC_MAX,
.mode = table->mode,
};
if (!smart_freq_init_done)
return -EINVAL;
mutex_lock(&freq_reason_mutex);
if (!write) {
tmp.data = table->data;
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
goto unlock;
}
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (ret)
goto unlock;
ret = -EINVAL;
if (data == &sysctl_ipc_freq_levels_cluster0[0])
cluster_id = 0;
if (data == &sysctl_ipc_freq_levels_cluster1[0])
cluster_id = 1;
if (data == &sysctl_ipc_freq_levels_cluster2[0])
cluster_id = 2;
if (data == &sysctl_ipc_freq_levels_cluster3[0])
cluster_id = 3;
if (cluster_id == -1)
goto unlock;
if (val[0] < 0)
goto unlock;
no_reason_freq = val[0];
/* Make sure all reasons freq are larger than NO_REASON */
/* IPC/freq should be in increasing order */
for (i = 1; i < SMART_FMAX_IPC_MAX; i++) {
if (val[i] < val[i-1])
goto unlock;
}
default_freq_config[cluster_id].legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed =
no_reason_freq;
for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
default_freq_config[cluster_id].ipc_reason_config[i].freq_allowed = val[i];
data[i] = val[i];
}
ret = 0;
unlock:
mutex_unlock(&freq_reason_mutex);
return ret;
}
/* return highest ipc of the cluster */
unsigned int get_cluster_ipc_level_freq(int curr_cpu, u64 time)
{
int cpu, winning_cpu, cpu_ipc_level = 0, index = 0;
struct walt_sched_cluster *cluster = cpu_cluster(curr_cpu);
struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
if (!smart_freq_init_done)
return 0;
for_each_cpu(cpu, &cluster->cpus) {
cpu_ipc_level = per_cpu(ipc_level, cpu);
if ((time - per_cpu(last_ipc_update, cpu)) > 7999999ULL) {
cpu_ipc_level = 0;
per_cpu(tickless_mode, cpu) = true;
} else {
per_cpu(tickless_mode, cpu) = false;
}
if (cpu_ipc_level >= index) {
winning_cpu = cpu;
index = cpu_ipc_level;
}
}
smart_freq_info->cluster_ipc_level = index;
trace_ipc_freq(cluster->id, winning_cpu, index,
smart_freq_info->ipc_reason_config[index].freq_allowed,
time, per_cpu(ipc_deactivate_ns, winning_cpu), curr_cpu,
per_cpu(ipc_cnt, curr_cpu));
return smart_freq_info->ipc_reason_config[index].freq_allowed;
}
static inline bool has_internal_freq_limit_changed(struct walt_sched_cluster *cluster)
{
unsigned int internal_freq, ipc_freq;
int i;
struct smart_freq_cluster_info *smci = cluster->smart_freq_info;
internal_freq = cluster->walt_internal_freq_limit;
cluster->walt_internal_freq_limit = cluster->max_freq;
for (i = 0; i < MAX_FREQ_CAP; i++)
cluster->walt_internal_freq_limit = min(freq_cap[i][cluster->id],
cluster->walt_internal_freq_limit);
ipc_freq = smci->ipc_reason_config[smci->cluster_ipc_level].freq_allowed;
cluster->walt_internal_freq_limit = max(ipc_freq,
cluster->walt_internal_freq_limit);
return cluster->walt_internal_freq_limit != internal_freq;
}
void update_smart_freq_capacities_one_cluster(struct walt_sched_cluster *cluster)
{
int cpu;
if (!smart_freq_init_done)
return;
if (has_internal_freq_limit_changed(cluster)) {
for_each_cpu(cpu, &cluster->cpus)
update_cpu_capacity_helper(cpu);
}
}
void update_smart_freq_capacities(void)
{
struct walt_sched_cluster *cluster;
if (!smart_freq_init_done)
return;
for_each_sched_cluster(cluster)
update_smart_freq_capacities_one_cluster(cluster);
}
/*
* Update the active smart freq reason for the cluster.
*/
static void smart_freq_update_one_cluster(struct walt_sched_cluster *cluster,
uint32_t current_reasons, u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
{
uint32_t current_reason, cluster_active_reason;
struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
unsigned long max_cap =
smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
int max_reason, i;
unsigned long old_freq_cap = freq_cap[SMART_FREQ][cluster->id];
struct rq *rq;
char smart_freq[25] = {0};
char smart_freq_reason[25] = {0};
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
current_reason = current_reasons & BIT(i);
cluster_active_reason = smart_freq_info->cluster_active_reason & BIT(i);
if (current_reason) {
smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
smart_freq_info->cluster_active_reason |= BIT(i);
if (i == TRAILBLAZER_SMART_FREQ)
trail_active = true;
else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
sustain_active = true;
} else if (cluster_active_reason) {
if (!smart_freq_info->legacy_reason_status[i].deactivate_ns)
smart_freq_info->legacy_reason_status[i].deactivate_ns = wallclock;
}
if (cluster_active_reason) {
/*
* For reasons with deactivation hysteresis, check here if we have
* crossed the hysteresis time and then deactivate the reason.
* We are relying on scheduler tick path to call this function
* thus deactivation of reason is only at tick
* boundary.
*/
if (smart_freq_info->legacy_reason_status[i].deactivate_ns) {
u64 delta = wallclock -
smart_freq_info->legacy_reason_status[i].deactivate_ns;
if (delta >= smart_freq_info->legacy_reason_config[i].hyst_ns) {
smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
smart_freq_info->cluster_active_reason &= ~BIT(i);
if (i == TRAILBLAZER_SMART_FREQ)
trail_active = false;
else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
sustain_active = false;
continue;
}
}
if (max_cap < smart_freq_info->legacy_reason_config[i].freq_allowed) {
max_cap = smart_freq_info->legacy_reason_config[i].freq_allowed;
max_reason = i;
}
}
}
if (enable_logging) {
snprintf(smart_freq, sizeof(smart_freq), "smart_fmax_%d", cluster->id);
trace_clock_set_rate(smart_freq, max_cap, raw_smp_processor_id());
snprintf(smart_freq_reason, sizeof(smart_freq_reason), "legacy_reason_%d", cluster->id);
trace_clock_set_rate(smart_freq_reason, max_reason, raw_smp_processor_id());
}
trace_sched_freq_uncap(cluster->id, nr_big, wakeup_ctr_sum, current_reasons,
smart_freq_info->cluster_active_reason, max_cap, max_reason);
if (old_freq_cap == max_cap)
return;
freq_cap[SMART_FREQ][cluster->id] = max_cap;
rq = cpu_rq(cpumask_first(&cluster->cpus));
/*
* cpufreq smart freq doesn't call get_util for the cpu, hence
* invoking callback without rq lock is safe.
*/
waltgov_run_callback(rq, WALT_CPUFREQ_SMART_FREQ_BIT);
}
#define UNCAP_THRES 300000000
#define UTIL_THRESHOLD 90
static bool thres_based_uncap(u64 window_start, struct walt_sched_cluster *cluster)
{
int cpu;
bool cluster_high_load = false, sustained_load = false;
unsigned long freq_capacity, tgt_cap;
unsigned long tgt_freq =
cluster->smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
struct walt_rq *wrq;
freq_capacity = arch_scale_cpu_capacity(cpumask_first(&cluster->cpus));
tgt_cap = mult_frac(freq_capacity, tgt_freq, cluster->max_possible_freq);
for_each_cpu(cpu, &cluster->cpus) {
wrq = &per_cpu(walt_rq, cpu);
if (wrq->util >= mult_frac(tgt_cap, UTIL_THRESHOLD, 100)) {
cluster_high_load = true;
if (!cluster->found_ts)
cluster->found_ts = window_start;
else if ((window_start - cluster->found_ts) >= UNCAP_THRES)
sustained_load = true;
break;
}
}
if (!cluster_high_load)
cluster->found_ts = 0;
return sustained_load;
}
unsigned int big_task_cnt = 6;
#define WAKEUP_CNT 100
/*
* reason is a two part bitmap
* 15 - 0 : reason type
* 31 - 16: changed state of reason
* this will help to pass multiple reasons at once and avoid multiple calls.
*/
/*
* This will be called from irq work path only
*/
void smart_freq_update_reason_common(u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
{
struct walt_sched_cluster *cluster;
bool current_state;
uint32_t cluster_reasons;
int i;
int cluster_active_reason;
uint32_t cluster_participation_mask;
bool sustained_load = false;
if (!smart_freq_init_done)
return;
for_each_sched_cluster(cluster)
sustained_load |= thres_based_uncap(wallclock, cluster);
for_each_sched_cluster(cluster) {
cluster_reasons = 0;
i = cluster->id;
cluster_participation_mask =
cluster->smart_freq_info->smart_freq_participation_mask;
/*
* NO_REASON
*/
if (cluster_participation_mask & BIT(NO_REASON_SMART_FREQ))
cluster_reasons |= BIT(NO_REASON_SMART_FREQ);
/*
* BOOST
*/
if (cluster_participation_mask & BIT(BOOST_SMART_FREQ)) {
current_state = is_storage_boost() || is_full_throttle_boost();
if (current_state)
cluster_reasons |= BIT(BOOST_SMART_FREQ);
}
/*
* TRAILBLAZER
*/
if (cluster_participation_mask & BIT(TRAILBLAZER_SMART_FREQ)) {
current_state = trailblazer_state;
if (current_state)
cluster_reasons |= BIT(TRAILBLAZER_SMART_FREQ);
}
/*
* SBT
*/
if (cluster_participation_mask & BIT(SBT_SMART_FREQ)) {
current_state = prev_is_sbt;
if (current_state)
cluster_reasons |= BIT(SBT_SMART_FREQ);
}
/*
* BIG_TASKCNT
*/
if (cluster_participation_mask & BIT(BIG_TASKCNT_SMART_FREQ)) {
current_state = (nr_big >= big_task_cnt) &&
(wakeup_ctr_sum < WAKEUP_CNT);
if (current_state)
cluster_reasons |= BIT(BIG_TASKCNT_SMART_FREQ);
}
/*
* SUSTAINED_HIGH_UTIL
*/
if (cluster_participation_mask & BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ)) {
current_state = sustained_load;
if (current_state)
cluster_reasons |= BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ);
}
/*
* PIPELINE_60FPS_OR_LESSER
*/
if (cluster_participation_mask &
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ)) {
current_state = pipeline_in_progress() &&
sched_ravg_window >= SCHED_RAVG_16MS_WINDOW;
if (current_state)
cluster_reasons |=
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ);
}
/*
* PIPELINE_90FPS
*/
if (cluster_participation_mask &
BIT(PIPELINE_90FPS_SMART_FREQ)) {
current_state = pipeline_in_progress() &&
sched_ravg_window == SCHED_RAVG_12MS_WINDOW;
if (current_state)
cluster_reasons |=
BIT(PIPELINE_90FPS_SMART_FREQ);
}
/*
* PIPELINE_120FPS_OR_GREATER
*/
if (cluster_participation_mask &
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ)) {
current_state = pipeline_in_progress() &&
sched_ravg_window == SCHED_RAVG_8MS_WINDOW;
if (current_state)
cluster_reasons |=
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ);
}
/*
* THERMAL_ROTATION
*/
if (cluster_participation_mask & BIT(THERMAL_ROTATION_SMART_FREQ)) {
current_state = (oscillate_cpu != -1);
if (current_state)
cluster_reasons |= BIT(THERMAL_ROTATION_SMART_FREQ);
}
cluster_active_reason = cluster->smart_freq_info->cluster_active_reason;
/* update the reasons for all the clusters */
if (cluster_reasons || cluster_active_reason)
smart_freq_update_one_cluster(cluster, cluster_reasons, wallclock,
nr_big, wakeup_ctr_sum);
}
}
/* Common config for 4 cluster system */
struct smart_freq_cluster_info default_freq_config[MAX_CLUSTERS];
void smart_freq_init(const char *name)
{
struct walt_sched_cluster *cluster;
int i = 0, j;
for_each_sched_cluster(cluster) {
cluster->smart_freq_info = &default_freq_config[i];
cluster->smart_freq_info->smart_freq_participation_mask = BIT(NO_REASON_SMART_FREQ);
cluster->smart_freq_info->cluster_active_reason = 0;
cluster->smart_freq_info->min_cycles = 100;
cluster->smart_freq_info->smart_freq_ipc_participation_mask = 0;
freq_cap[SMART_FREQ][cluster->id] = FREQ_QOS_MAX_DEFAULT_VALUE;
memset(cluster->smart_freq_info->legacy_reason_status, 0,
sizeof(struct smart_freq_legacy_reason_status) *
LEGACY_SMART_FREQ);
memset(cluster->smart_freq_info->legacy_reason_config, 0,
sizeof(struct smart_freq_legacy_reason_config) *
LEGACY_SMART_FREQ);
memset(cluster->smart_freq_info->ipc_reason_config, 0,
sizeof(struct smart_freq_ipc_reason_config) *
SMART_FMAX_IPC_MAX);
for (j = 0; j < LEGACY_SMART_FREQ; j++) {
cluster->smart_freq_info->legacy_reason_config[j].freq_allowed =
FREQ_QOS_MAX_DEFAULT_VALUE;
}
for (j = 0; j < SMART_FMAX_IPC_MAX; j++) {
cluster->smart_freq_info->ipc_reason_config[j].freq_allowed =
FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster0[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster1[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster2[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_ipc_freq_levels_cluster3[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
}
i++;
}
if (!strcmp(name, "SUN")) {
for_each_sched_cluster(cluster) {
if (cluster->id == 0) {
/* Legacy */
cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
2400000;
cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
300000000;
cluster->smart_freq_info->smart_freq_participation_mask |=
BIT(BOOST_SMART_FREQ) |
BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
BIT(BIG_TASKCNT_SMART_FREQ) |
BIT(TRAILBLAZER_SMART_FREQ) |
BIT(SBT_SMART_FREQ) |
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
BIT(PIPELINE_90FPS_SMART_FREQ) |
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
BIT(THERMAL_ROTATION_SMART_FREQ);
/* IPC */
cluster->smart_freq_info->ipc_reason_config[0].ipc = 120;
cluster->smart_freq_info->ipc_reason_config[1].ipc = 180;
cluster->smart_freq_info->ipc_reason_config[2].ipc = 220;
cluster->smart_freq_info->ipc_reason_config[3].ipc = 260;
cluster->smart_freq_info->ipc_reason_config[4].ipc = 300;
cluster->smart_freq_info->smart_freq_ipc_participation_mask =
BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
BIT(IPC_E);
cluster->smart_freq_info->min_cycles = 5806080;
} else if (cluster->id == 1) {
/* Legacy */
cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
3513600;
cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
1000000000;
cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
300000000;
cluster->smart_freq_info->smart_freq_participation_mask |=
BIT(BOOST_SMART_FREQ) |
BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
BIT(BIG_TASKCNT_SMART_FREQ) |
BIT(TRAILBLAZER_SMART_FREQ) |
BIT(SBT_SMART_FREQ) |
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
BIT(PIPELINE_90FPS_SMART_FREQ) |
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
BIT(THERMAL_ROTATION_SMART_FREQ);
/* IPC */
cluster->smart_freq_info->ipc_reason_config[0].ipc = 220;
cluster->smart_freq_info->ipc_reason_config[1].ipc = 260;
cluster->smart_freq_info->ipc_reason_config[2].ipc = 280;
cluster->smart_freq_info->ipc_reason_config[3].ipc = 320;
cluster->smart_freq_info->ipc_reason_config[4].ipc = 400;
cluster->smart_freq_info->smart_freq_ipc_participation_mask =
BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
BIT(IPC_E);
cluster->smart_freq_info->min_cycles = 7004160;
}
}
}
smart_freq_init_done = true;
update_smart_freq_capacities();
}

1964
kernel/sched/walt/sysctl.c Normal file

File diff suppressed because it is too large Load Diff

84
kernel/sched/walt/trace.c Normal file
View File

@@ -0,0 +1,84 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
*/
#include "walt.h"
static inline void __window_data(u32 *dst, u32 *src)
{
if (src)
memcpy(dst, src, nr_cpu_ids * sizeof(u32));
else
memset(dst, 0, nr_cpu_ids * sizeof(u32));
}
struct trace_seq;
const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
{
int i;
const char *ret = p->buffer + seq_buf_used(&p->seq);
for (i = 0; i < buf_len; i++)
trace_seq_printf(p, "%u ", buf[i]);
trace_seq_putc(p, 0);
return ret;
}
static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
{
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
if (curr)
if (new)
return wrq->nt_curr_runnable_sum;
else
return wrq->curr_runnable_sum;
else
if (new)
return wrq->nt_prev_runnable_sum;
else
return wrq->prev_runnable_sum;
}
static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
{
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
if (curr)
if (new)
return wrq->grp_time.nt_curr_runnable_sum;
else
return wrq->grp_time.curr_runnable_sum;
else
if (new)
return wrq->grp_time.nt_prev_runnable_sum;
else
return wrq->grp_time.prev_runnable_sum;
}
static inline s64
__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
bool src, bool new, bool curr)
{
switch (migrate_type) {
case RQ_TO_GROUP:
if (src)
return __rq_update_sum(rq, curr, new);
else
return __grp_update_sum(rq, curr, new);
case GROUP_TO_RQ:
if (src)
return __grp_update_sum(rq, curr, new);
else
return __rq_update_sum(rq, curr, new);
default:
WARN_ON_ONCE(1);
return -EINVAL;
}
}
#define CREATE_TRACE_POINTS
#include "trace.h"

2060
kernel/sched/walt/trace.h Normal file

File diff suppressed because it is too large Load Diff

5683
kernel/sched/walt/walt.c Normal file

File diff suppressed because it is too large Load Diff

1545
kernel/sched/walt/walt.h Normal file

File diff suppressed because it is too large Load Diff

1532
kernel/sched/walt/walt_cfs.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,207 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include "walt.h"
#include "trace.h"
#include <soc/qcom/socinfo.h>
unsigned long __read_mostly soc_flags;
unsigned int trailblazer_floor_freq[MAX_CLUSTERS];
cpumask_t asym_cap_sibling_cpus;
cpumask_t pipeline_sync_cpus;
int oscillate_period_ns;
int soc_sched_lib_name_capacity;
#define PIPELINE_BUSY_THRESH_8MS_WINDOW 7
#define PIPELINE_BUSY_THRESH_12MS_WINDOW 11
#define PIPELINE_BUSY_THRESH_16MS_WINDOW 15
void walt_config(void)
{
int i, j, cpu;
const char *name = socinfo_get_id_string();
sysctl_sched_group_upmigrate_pct = 100;
sysctl_sched_group_downmigrate_pct = 95;
sysctl_sched_task_unfilter_period = 100000000;
sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
sysctl_sched_ravg_window_nr_ticks = (HZ / NR_WINDOWS_PER_SEC);
sched_load_granule = DEFAULT_SCHED_RAVG_WINDOW / NUM_LOAD_INDICES;
sysctl_sched_coloc_busy_hyst_enable_cpus = 112;
sysctl_sched_util_busy_hyst_enable_cpus = 255;
sysctl_sched_coloc_busy_hyst_max_ms = 5000;
sched_ravg_window = DEFAULT_SCHED_RAVG_WINDOW;
sysctl_input_boost_ms = 40;
sysctl_sched_min_task_util_for_boost = 51;
sysctl_sched_min_task_util_for_uclamp = 51;
sysctl_sched_min_task_util_for_colocation = 35;
sysctl_sched_many_wakeup_threshold = WALT_MANY_WAKEUP_DEFAULT;
sysctl_walt_rtg_cfs_boost_prio = 99; /* disabled by default */
sysctl_sched_sync_hint_enable = 1;
sysctl_sched_skip_sp_newly_idle_lb = 1;
sysctl_sched_hyst_min_coloc_ns = 80000000;
sysctl_sched_idle_enough = SCHED_IDLE_ENOUGH_DEFAULT;
sysctl_sched_cluster_util_thres_pct = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
sysctl_em_inflate_pct = 100;
sysctl_em_inflate_thres = 1024;
sysctl_max_freq_partial_halt = FREQ_QOS_MAX_DEFAULT_VALUE;
asym_cap_sibling_cpus = CPU_MASK_NONE;
pipeline_sync_cpus = CPU_MASK_NONE;
for_each_possible_cpu(cpu) {
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
if (i)
smart_freq_legacy_reason_hyst_ms[i][cpu] = 4;
else
smart_freq_legacy_reason_hyst_ms[i][cpu] = 0;
}
}
for (i = 0; i < MAX_MARGIN_LEVELS; i++) {
sysctl_sched_capacity_margin_up_pct[i] = 95; /* ~5% margin */
sysctl_sched_capacity_margin_dn_pct[i] = 85; /* ~15% margin */
sysctl_sched_early_up[i] = 1077;
sysctl_sched_early_down[i] = 1204;
}
for (i = 0; i < WALT_NR_CPUS; i++) {
sysctl_sched_coloc_busy_hyst_cpu[i] = 39000000;
sysctl_sched_coloc_busy_hyst_cpu_busy_pct[i] = 10;
sysctl_sched_util_busy_hyst_cpu[i] = 5000000;
sysctl_sched_util_busy_hyst_cpu_util[i] = 15;
sysctl_input_boost_freq[i] = 0;
}
for (i = 0; i < MAX_CLUSTERS; i++) {
sysctl_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
high_perf_cluster_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
sysctl_sched_idle_enough_clust[i] = SCHED_IDLE_ENOUGH_DEFAULT;
sysctl_sched_cluster_util_thres_pct_clust[i] = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
trailblazer_floor_freq[i] = 0;
for (j = 0; j < MAX_CLUSTERS; j++) {
load_sync_util_thres[i][j] = 0;
load_sync_low_pct[i][j] = 0;
load_sync_high_pct[i][j] = 0;
}
}
for (i = 0; i < MAX_FREQ_CAP; i++) {
for (j = 0; j < MAX_CLUSTERS; j++)
freq_cap[i][j] = FREQ_QOS_MAX_DEFAULT_VALUE;
}
sysctl_sched_lrpb_active_ms[0] = PIPELINE_BUSY_THRESH_8MS_WINDOW;
sysctl_sched_lrpb_active_ms[1] = PIPELINE_BUSY_THRESH_12MS_WINDOW;
sysctl_sched_lrpb_active_ms[2] = PIPELINE_BUSY_THRESH_16MS_WINDOW;
soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
soc_feat_set(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
soc_feat_set(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
soc_feat_set(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
soc_feat_set(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
soc_feat_set(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
sysctl_pipeline_special_task_util_thres = 100;
sysctl_pipeline_non_special_task_util_thres = 200;
sysctl_pipeline_pin_thres_low_pct = 50;
sysctl_pipeline_pin_thres_high_pct = 60;
/* return if socinfo is not available */
if (!name)
return;
if (!strcmp(name, "SUN")) {
sysctl_sched_suppress_region2 = 1;
soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
soc_feat_unset(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
soc_feat_unset(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
trailblazer_floor_freq[0] = 1000000;
debugfs_walt_features |= WALT_FEAT_TRAILBLAZER_BIT;
debugfs_walt_features |= WALT_FEAT_UCLAMP_FREQ_BIT;
soc_feat_unset(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
soc_feat_set(SOC_ENABLE_FT_BOOST_TO_ALL);
oscillate_period_ns = 8000000;
soc_feat_set(SOC_ENABLE_EXPERIMENT3);
/*G + P*/
cpumask_copy(&pipeline_sync_cpus, cpu_possible_mask);
soc_sched_lib_name_capacity = 2;
soc_feat_unset(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
sysctl_cluster01_load_sync[0] = 350;
sysctl_cluster01_load_sync[1] = 100;
sysctl_cluster01_load_sync[2] = 100;
sysctl_cluster10_load_sync[0] = 512;
sysctl_cluster10_load_sync[1] = 90;
sysctl_cluster10_load_sync[2] = 90;
load_sync_util_thres[0][1] = sysctl_cluster01_load_sync[0];
load_sync_low_pct[0][1] = sysctl_cluster01_load_sync[1];
load_sync_high_pct[0][1] = sysctl_cluster01_load_sync[2];
load_sync_util_thres[1][0] = sysctl_cluster10_load_sync[0];
load_sync_low_pct[1][0] = sysctl_cluster10_load_sync[1];
load_sync_high_pct[1][0] = sysctl_cluster10_load_sync[2];
sysctl_cluster01_load_sync_60fps[0] = 400;
sysctl_cluster01_load_sync_60fps[1] = 60;
sysctl_cluster01_load_sync_60fps[2] = 100;
sysctl_cluster10_load_sync_60fps[0] = 500;
sysctl_cluster10_load_sync_60fps[1] = 70;
sysctl_cluster10_load_sync_60fps[2] = 90;
load_sync_util_thres_60fps[0][1] = sysctl_cluster01_load_sync_60fps[0];
load_sync_low_pct_60fps[0][1] = sysctl_cluster01_load_sync_60fps[1];
load_sync_high_pct_60fps[0][1] = sysctl_cluster01_load_sync_60fps[2];
load_sync_util_thres_60fps[1][0] = sysctl_cluster10_load_sync_60fps[0];
load_sync_low_pct_60fps[1][0] = sysctl_cluster10_load_sync_60fps[1];
load_sync_high_pct_60fps[1][0] = sysctl_cluster10_load_sync_60fps[2];
/* CPU0 needs an 9mS bias for all legacy smart freq reasons */
for (i = 1; i < LEGACY_SMART_FREQ; i++)
smart_freq_legacy_reason_hyst_ms[i][0] = 9;
for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
for (i = 1; i < LEGACY_SMART_FREQ; i++)
smart_freq_legacy_reason_hyst_ms[i][cpu] = 2;
}
for_each_possible_cpu(cpu) {
smart_freq_legacy_reason_hyst_ms[PIPELINE_60FPS_OR_LESSER_SMART_FREQ][cpu] =
1;
}
soc_feat_unset(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
} else if (!strcmp(name, "PINEAPPLE")) {
soc_feat_set(SOC_ENABLE_SILVER_RT_SPREAD_BIT);
soc_feat_set(SOC_ENABLE_BOOST_TO_NEXT_CLUSTER_BIT);
/* T + G */
cpumask_or(&asym_cap_sibling_cpus,
&asym_cap_sibling_cpus, &cpu_array[0][1]);
cpumask_or(&asym_cap_sibling_cpus,
&asym_cap_sibling_cpus, &cpu_array[0][2]);
/*
* Treat Golds and Primes as candidates for load sync under pipeline usecase.
* However, it is possible that a single CPU is not present. As prime is the
* only cluster with only one CPU, guard this setting by ensuring 4 clusters
* are present.
*/
if (num_sched_clusters == 4) {
cpumask_or(&pipeline_sync_cpus,
&pipeline_sync_cpus, &cpu_array[0][2]);
cpumask_or(&pipeline_sync_cpus,
&pipeline_sync_cpus, &cpu_array[0][3]);
}
sysctl_cluster23_load_sync[0] = 350;
sysctl_cluster23_load_sync[1] = 100;
sysctl_cluster23_load_sync[2] = 100;
sysctl_cluster32_load_sync[0] = 512;
sysctl_cluster32_load_sync[1] = 90;
sysctl_cluster32_load_sync[2] = 90;
load_sync_util_thres[2][3] = sysctl_cluster23_load_sync[0];
load_sync_low_pct[2][3] = sysctl_cluster23_load_sync[1];
load_sync_high_pct[2][3] = sysctl_cluster23_load_sync[2];
load_sync_util_thres[3][2] = sysctl_cluster32_load_sync[0];
load_sync_low_pct[3][2] = sysctl_cluster32_load_sync[1];
load_sync_high_pct[3][2] = sysctl_cluster32_load_sync[2];
}
smart_freq_init(name);
}

View File

@@ -0,0 +1,172 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/topology.h>
#include "walt.h"
#define CYCLE_CNTR_OFFSET(core_id, acc_count) \
(acc_count ? ((core_id + 1) * 4) : 0)
struct cpufreq_counter {
u64 total_cycle_counter;
u32 prev_cycle_counter;
spinlock_t lock;
};
static struct cpufreq_counter walt_cpufreq_counter[NR_CPUS];
struct walt_cpufreq_soc_data {
u32 reg_enable;
u32 reg_cycle_cntr;
bool accumulative_counter;
};
struct walt_cpufreq_data {
void __iomem *base;
const struct walt_cpufreq_soc_data *soc_data;
};
static struct walt_cpufreq_data cpufreq_data[MAX_CLUSTERS];
u64 walt_cpufreq_get_cpu_cycle_counter(int cpu, u64 wc)
{
const struct walt_cpufreq_soc_data *soc_data;
struct cpufreq_counter *cpu_counter;
struct walt_cpufreq_data *data;
u64 cycle_counter_ret;
unsigned long flags;
u16 offset;
u32 val;
data = &cpufreq_data[cpu_cluster(cpu)->id];
soc_data = data->soc_data;
cpu_counter = &walt_cpufreq_counter[cpu];
spin_lock_irqsave(&cpu_counter->lock, flags);
offset = CYCLE_CNTR_OFFSET(topology_core_id(cpu),
soc_data->accumulative_counter);
val = readl_relaxed(data->base +
soc_data->reg_cycle_cntr + offset);
if (val < cpu_counter->prev_cycle_counter) {
/* Handle counter overflow */
cpu_counter->total_cycle_counter += UINT_MAX -
cpu_counter->prev_cycle_counter + val;
cpu_counter->prev_cycle_counter = val;
} else {
cpu_counter->total_cycle_counter += val -
cpu_counter->prev_cycle_counter;
cpu_counter->prev_cycle_counter = val;
}
cycle_counter_ret = cpu_counter->total_cycle_counter;
spin_unlock_irqrestore(&cpu_counter->lock, flags);
pr_debug("CPU %u, core-id 0x%x, offset %u cycle_counts=%llu\n",
cpu, topology_core_id(cpu), offset, cycle_counter_ret);
return cycle_counter_ret;
}
static int walt_cpufreq_cycle_cntr_driver_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
struct of_phandle_args args;
struct device_node *cpu_np;
void __iomem *base;
int ret = -ENODEV, index, cpu;
struct walt_sched_cluster *cluster;
for_each_sched_cluster(cluster) {
cpu = cluster_first_cpu(cluster);
cpu_np = of_cpu_device_node_get(cpu);
if (!cpu_np)
return -EINVAL;
ret = of_parse_phandle_with_args(cpu_np, "qcom,freq-domain",
"#freq-domain-cells", 0, &args);
of_node_put(cpu_np);
if (ret)
return ret;
index = args.args[0];
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
if (!res) {
dev_err(dev, "failed to get mem resource %d\n", index);
return -ENODEV;
}
base = devm_ioremap(dev, res->start, resource_size(res));
if (!base) {
dev_err(dev, "failed to map resource %pR\n", res);
return -ENOMEM;
}
cpufreq_data[cluster->id].soc_data = of_device_get_match_data(&pdev->dev);
cpufreq_data[cluster->id].base = base;
/* HW should be in enabled state to proceed */
if (!(readl_relaxed(base + cpufreq_data[cluster->id].soc_data->reg_enable) & 0x1)) {
dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index);
return -ENODEV;
}
}
if (!walt_get_cycle_counts_cb) {
for_each_possible_cpu(cpu)
spin_lock_init(&walt_cpufreq_counter[cpu].lock);
walt_get_cycle_counts_cb = walt_cpufreq_get_cpu_cycle_counter;
use_cycle_counter = true;
complete(&walt_get_cycle_counts_cb_completion);
return 0;
}
return ret;
}
static int walt_cpufreq_cycle_cntr_driver_remove(struct platform_device *pdev)
{
return 0;
}
static const struct walt_cpufreq_soc_data hw_soc_data = {
.reg_enable = 0x0,
.reg_cycle_cntr = 0x9c0,
.accumulative_counter = false,
};
static const struct walt_cpufreq_soc_data epss_soc_data = {
.reg_enable = 0x0,
.reg_cycle_cntr = 0x3c4,
.accumulative_counter = true,
};
static const struct of_device_id walt_cpufreq_cycle_cntr_match[] = {
{ .compatible = "qcom,cycle-cntr-hw", .data = &hw_soc_data },
{ .compatible = "qcom,epss", .data = &epss_soc_data },
{}
};
static struct platform_driver walt_cpufreq_cycle_cntr_driver = {
.driver = {
.name = "walt-cpufreq-cycle-cntr",
.of_match_table = walt_cpufreq_cycle_cntr_match
},
.probe = walt_cpufreq_cycle_cntr_driver_probe,
.remove = walt_cpufreq_cycle_cntr_driver_remove,
};
int walt_cpufreq_cycle_cntr_driver_register(void)
{
return platform_driver_register(&walt_cpufreq_cycle_cntr_driver);
}

View File

@@ -0,0 +1,115 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
#include <trace/hooks/cpufreq.h>
#include "walt.h"
struct walt_cpu_cycle {
rwlock_t lock;
u64 cycles;
u64 last_time_ns;
unsigned int cur_freq_khz;
unsigned int mult_fact;
};
static DEFINE_PER_CPU(struct walt_cpu_cycle, walt_cc);
static u64 walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
{
unsigned long flags;
u64 delta;
u64 ret;
/*
* freq is in KHz. so multiply by 1000.
* time in nsec. so divide by NSEC_PER_SEC.
*
* cycles = (freq * 1000) * (t/10^9)
* = (freq * t)/10^6
*
*/
read_lock_irqsave(&wcc->lock, flags);
delta = wc - wcc->last_time_ns;
ret = wcc->cycles + ((delta * wcc->mult_fact) >> 20);
read_unlock_irqrestore(&wcc->lock, flags);
return ret;
}
static void update_walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
{
unsigned long flags;
u64 delta;
/*
* freq is in KHz. so multiply by 1000.
* time in nsec. so divide by NSEC_PER_SEC.
*
* cycles = (freq * 1000) * (t/10^9)
* = (freq * t)/10^6
*
*/
write_lock_irqsave(&wcc->lock, flags);
delta = wc - wcc->last_time_ns;
wcc->cycles += (delta * wcc->mult_fact) >> 20;
wcc->last_time_ns = wc;
write_unlock_irqrestore(&wcc->lock, flags);
}
u64 walt_cpu_cycle_counter(int cpu, u64 wc)
{
struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, cpu);
u64 cycles;
cycles = walt_compute_cpu_cycles(wcc, wc);
return cycles;
}
static void walt_cpufreq_transition(void *unused, struct cpufreq_policy *policy)
{
int i;
struct walt_cpu_cycle *wcc;
u64 wc;
unsigned int mult_fact;
wc = sched_clock();
for_each_cpu(i, policy->related_cpus) {
wcc = &per_cpu(walt_cc, i);
update_walt_compute_cpu_cycles(wcc, wc);
wcc->cur_freq_khz = policy->cur;
}
mult_fact = (policy->cur << SCHED_CAPACITY_SHIFT)/1000;
mult_fact = (mult_fact << SCHED_CAPACITY_SHIFT)/1000;
for_each_cpu(i, policy->related_cpus) {
wcc = &per_cpu(walt_cc, i);
wcc->mult_fact = mult_fact;
}
}
void walt_cycle_counter_init(void)
{
int i;
for_each_possible_cpu(i) {
struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, i);
rwlock_init(&wcc->lock);
wcc->cur_freq_khz = cpufreq_quick_get(i);
wcc->last_time_ns = 0;
wcc->cycles = 0;
wcc->mult_fact = (wcc->cur_freq_khz << SCHED_CAPACITY_SHIFT)/1000;
wcc->mult_fact = (wcc->mult_fact << SCHED_CAPACITY_SHIFT)/1000;
}
walt_get_cycle_counts_cb = walt_cpu_cycle_counter;
use_cycle_counter = true;
complete(&walt_get_cycle_counts_cb_completion);
register_trace_android_rvh_cpufreq_transition(walt_cpufreq_transition, NULL);
}

View File

@@ -0,0 +1,34 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <trace/hooks/sched.h>
#include "walt.h"
#include "walt_debug.h"
static void android_rvh_schedule_bug(void *unused, void *unused2)
{
BUG();
}
static int __init walt_debug_init(void)
{
int ret;
ret = preemptirq_long_init();
if (ret)
return ret;
register_trace_android_rvh_schedule_bug(android_rvh_schedule_bug, NULL);
return 0;
}
module_init(walt_debug_init);
MODULE_DESCRIPTION("QTI WALT Debug Module");
MODULE_LICENSE("GPL v2");

View File

@@ -0,0 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
int preemptirq_long_init(void);

View File

@@ -0,0 +1,130 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include "walt.h"
struct gclk_counter {
u64 total_cycle_counter;
u64 prev_cycle_counter;
spinlock_t lock;
};
static struct gclk_counter walt_gclk_counter[MAX_CLUSTERS];
struct walt_ncc_data {
void __iomem *base;
};
static struct walt_ncc_data ncc_data[MAX_CLUSTERS];
u64 walt_get_ncc_gclk_cycle_counter(int cpu, u64 wc)
{
struct gclk_counter *ncc_counter;
struct walt_ncc_data *data;
u64 cycle_counter_ret;
unsigned long flags;
int index;
u64 val;
index = topology_cluster_id(cpu);
data = &ncc_data[index];
ncc_counter = &walt_gclk_counter[index];
spin_lock_irqsave(&ncc_counter->lock, flags);
val = readq_relaxed(data->base);
if (val < ncc_counter->prev_cycle_counter) {
/* Handle counter overflow.
* Most likely will not occur
* for 64 bit counter, but
* handling for completeness.
*/
ncc_counter->total_cycle_counter += U64_MAX -
ncc_counter->prev_cycle_counter + val;
ncc_counter->prev_cycle_counter = val;
} else {
ncc_counter->total_cycle_counter += val -
ncc_counter->prev_cycle_counter;
ncc_counter->prev_cycle_counter = val;
}
cycle_counter_ret = ncc_counter->total_cycle_counter;
spin_unlock_irqrestore(&ncc_counter->lock, flags);
pr_debug("CPU %u, cluster-id %d\n", cpu, index);
return cycle_counter_ret;
}
static int walt_gclk_cycle_counter_driver_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
void __iomem *base;
int ret = -ENODEV, index;
struct walt_sched_cluster *cluster;
for_each_sched_cluster(cluster) {
index = topology_cluster_id(cpumask_first(&cluster->cpus));
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
if (!res) {
dev_err(dev, "failed to get mem resource %d\n", index);
return -ENODEV;
}
if (!devm_request_mem_region(dev, res->start, resource_size(res), res->name)) {
dev_err(dev, "failed to request resource %pR\n", res);
return -EBUSY;
}
base = devm_ioremap(dev, res->start, resource_size(res));
if (!base) {
dev_err(dev, "failed to map resource %pR\n", res);
return -ENOMEM;
}
ncc_data[index].base = base;
}
if (!walt_get_cycle_counts_cb) {
for (int i = 0; i < MAX_CLUSTERS; i++)
spin_lock_init(&walt_gclk_counter[i].lock);
walt_get_cycle_counts_cb = walt_get_ncc_gclk_cycle_counter;
use_cycle_counter = true;
complete(&walt_get_cycle_counts_cb_completion);
return 0;
}
return ret;
}
static int walt_gclk_cycle_counter_driver_remove(struct platform_device *pdev)
{
return 0;
}
static const struct of_device_id walt_gclk_cycle_counter_match[] = {
{ .compatible = "qcom,gclk" },
{}
};
static struct platform_driver walt_gclk_cycle_counter_driver = {
.driver = {
.name = "walt-gclk-cycle-counter",
.of_match_table = walt_gclk_cycle_counter_match
},
.probe = walt_gclk_cycle_counter_driver_probe,
.remove = walt_gclk_cycle_counter_driver_remove,
};
int walt_gclk_cycle_counter_driver_register(void)
{
return platform_driver_register(&walt_gclk_cycle_counter_driver);
}

View File

@@ -0,0 +1,713 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/sched/isolation.h>
#include <trace/hooks/sched.h>
#include <walt.h>
#include "trace.h"
#ifdef CONFIG_HOTPLUG_CPU
enum pause_type {
HALT,
PARTIAL_HALT,
MAX_PAUSE_TYPE
};
/* if a cpu is halting */
struct cpumask __cpu_halt_mask;
struct cpumask __cpu_partial_halt_mask;
/* spin lock to allow calling from non-preemptible context */
static DEFINE_RAW_SPINLOCK(halt_lock);
struct halt_cpu_state {
u8 client_vote_mask[MAX_PAUSE_TYPE];
};
static DEFINE_PER_CPU(struct halt_cpu_state, halt_state);
static DEFINE_RAW_SPINLOCK(walt_drain_pending_lock);
/* the amount of time allowed for enqueue operations that happen
* just after a halt operation.
*/
#define WALT_HALT_CHECK_THRESHOLD_NS 400000
/*
* Remove a task from the runqueue and pretend that it's migrating. This
* should prevent migrations for the detached task and disallow further
* changes to tsk_cpus_allowed.
*/
void
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks)
{
walt_lockdep_assert_rq(rq, p);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(rq, p, 0);
list_add(&p->se.group_node, tasks);
}
void attach_tasks_core(struct list_head *tasks, struct rq *rq)
{
struct task_struct *p;
walt_lockdep_assert_rq(rq, NULL);
while (!list_empty(tasks)) {
p = list_first_entry(tasks, struct task_struct, se.group_node);
list_del_init(&p->se.group_node);
BUG_ON(task_rq(p) != rq);
activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
}
}
/*
* Migrate all tasks from the rq, sleeping tasks will be migrated by
* try_to_wake_up()->select_task_rq().
*
* Called with rq->__lock held even though we'er in stop_machine() and
* there's no concurrency possible, we hold the required locks anyway
* because of lock validation efforts.
*
* The function will skip CPU pinned kthreads.
*/
static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
{
struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop;
LIST_HEAD(percpu_kthreads);
unsigned int num_pinned_kthreads = 1;
struct rq_flags orf = *rf;
int dest_cpu;
/*
* Fudge the rq selection such that the below task selection loop
* doesn't get stuck on the currently eligible stop task.
*
* We're currently inside stop_machine() and the rq is either stuck
* in the stop_machine_cpu_stop() loop, or we're executing this code,
* either way we should never end up calling schedule() until we're
* done here.
*/
rq->stop = NULL;
/*
* put_prev_task() and pick_next_task() sched
* class method both need to have an up-to-date
* value of rq->clock[_task]
*/
update_rq_clock(rq);
#ifdef CONFIG_SCHED_DEBUG
/* note the clock update in orf */
orf.clock_update_flags |= RQCF_UPDATED;
#endif
for (;;) {
/*
* There's this thread running, bail when that's the only
* remaining thread:
*/
if (rq->nr_running == 1)
break;
next = pick_migrate_task(rq);
/*
* Argh ... no iterator for tasks, we need to remove the
* kthread from the run-queue to continue.
*/
if (is_per_cpu_kthread(next)) {
detach_one_task_core(next, rq, &percpu_kthreads);
num_pinned_kthreads += 1;
continue;
}
/*
* Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->__lock, such that holding either
* stabilizes the mask.
*
* Drop rq->__lock is not quite as disastrous as it usually is
* because !cpu_active at this point, which means load-balance
* will not interfere. Also, stop-machine.
*/
rq_unlock(rq, rf);
raw_spin_lock(&next->pi_lock);
raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
/*
* Since we're inside stop-machine, _nothing_ should have
* changed the task, WARN if weird stuff happened, because in
* that case the above rq->__lock drop is a fail too.
*/
if (task_rq(next) != rq || !task_on_rq_queued(next)) {
raw_spin_unlock(&next->pi_lock);
continue;
}
/* Find suitable destination for @next */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
if (cpu_of(rq) != dest_cpu && !is_migration_disabled(next)) {
/* only perform a required migration */
rq = __migrate_task(rq, rf, next, dest_cpu);
if (rq != dead_rq) {
rq_unlock(rq, rf);
rq = dead_rq;
*rf = orf;
raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
}
} else {
detach_one_task_core(next, rq, &percpu_kthreads);
num_pinned_kthreads += 1;
}
raw_spin_unlock(&next->pi_lock);
}
if (num_pinned_kthreads > 1)
attach_tasks_core(&percpu_kthreads, rq);
rq->stop = stop;
}
void __balance_callbacks(struct rq *rq);
static int drain_rq_cpu_stop(void *data)
{
struct rq *rq = this_rq();
struct rq_flags rf;
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
rq_lock_irqsave(rq, &rf);
/* rq lock is pinned */
/* migrate tasks assumes that the lock is pinned, and will unlock/repin */
migrate_tasks(rq, &rf);
/* __balance_callbacks can unlock and relock the rq lock. unpin */
rq_unpin_lock(rq, &rf);
/*
* service any callbacks that were accumulated, prior to unlocking. such that
* any subsequent calls to rq_lock... will see an rq->balance_callback set to
* the default (0 or balance_push_callback);
*/
wrq->enqueue_counter = 0;
__balance_callbacks(rq);
if (wrq->enqueue_counter)
WALT_BUG(WALT_BUG_WALT, NULL, "cpu: %d task was re-enqueued", cpu_of(rq));
/* lock is no longer pinned, raw unlock using same flags as locking */
raw_spin_rq_unlock_irqrestore(rq, rf.flags);
return 0;
}
static int cpu_drain_rq(unsigned int cpu)
{
if (!cpu_online(cpu))
return 0;
if (available_idle_cpu(cpu))
return 0;
/* this will schedule, must not be in atomic context */
return stop_one_cpu(cpu, drain_rq_cpu_stop, NULL);
}
struct drain_thread_data {
cpumask_t cpus_to_drain;
};
static struct drain_thread_data drain_data = {
.cpus_to_drain = { CPU_BITS_NONE }
};
static int __ref try_drain_rqs(void *data)
{
cpumask_t *cpus_ptr = &((struct drain_thread_data *)data)->cpus_to_drain;
int cpu;
unsigned long flags;
while (!kthread_should_stop()) {
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
if (cpumask_weight(cpus_ptr)) {
cpumask_t local_cpus;
cpumask_copy(&local_cpus, cpus_ptr);
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
for_each_cpu(cpu, &local_cpus)
cpu_drain_rq(cpu);
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
cpumask_andnot(cpus_ptr, cpus_ptr, &local_cpus);
}
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
set_current_state(TASK_RUNNING);
}
return 0;
}
void restrict_cpus_and_freq(struct cpumask *cpus)
{
struct cpumask restrict_cpus;
int cpu = 0;
cpumask_copy(&restrict_cpus, cpus);
if (cpumask_intersects(cpus, cpu_partial_halt_mask) &&
!cpumask_intersects(cpus, cpu_halt_mask) &&
is_state1()) {
for_each_cpu(cpu, cpus)
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
sysctl_max_freq_partial_halt;
} else {
for_each_cpu(cpu, cpus) {
cpumask_or(&restrict_cpus, &restrict_cpus, &(cpu_cluster(cpu)->cpus));
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
FREQ_QOS_MAX_DEFAULT_VALUE;
}
}
update_smart_freq_capacities();
}
struct task_struct *walt_drain_thread;
static int halt_cpus(struct cpumask *cpus, enum pause_type type)
{
int cpu;
int ret = 0;
u64 start_time = 0;
struct halt_cpu_state *halt_cpu_state;
unsigned long flags;
if (trace_halt_cpus_enabled())
start_time = sched_clock();
trace_halt_cpus_start(cpus, 1);
/* add the cpus to the halt mask */
for_each_cpu(cpu, cpus) {
if (cpu == cpumask_first(system_32bit_el0_cpumask())) {
ret = -EINVAL;
goto out;
}
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (type == HALT)
cpumask_set_cpu(cpu, cpu_halt_mask);
else
cpumask_set_cpu(cpu, cpu_partial_halt_mask);
/* guarantee mask written at this time */
wmb();
}
restrict_cpus_and_freq(cpus);
/* migrate tasks off the cpu */
if (type == HALT) {
/* signal and wakeup the drain kthread */
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
cpumask_or(&drain_data.cpus_to_drain, &drain_data.cpus_to_drain, cpus);
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
wake_up_process(walt_drain_thread);
}
out:
trace_halt_cpus(cpus, start_time, 1, ret);
return ret;
}
/* start the cpus again, and kick them to balance */
static int start_cpus(struct cpumask *cpus, enum pause_type type)
{
u64 start_time = sched_clock();
struct halt_cpu_state *halt_cpu_state;
int cpu;
trace_halt_cpus_start(cpus, 0);
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
/* guarantee the halt state is updated */
wmb();
if (type == HALT)
cpumask_clear_cpu(cpu, cpu_halt_mask);
else
cpumask_clear_cpu(cpu, cpu_partial_halt_mask);
/* kick the cpu so it can pull tasks
* after the mask has been cleared.
*/
walt_smp_call_newidle_balance(cpu);
}
restrict_cpus_and_freq(cpus);
trace_halt_cpus(cpus, start_time, 0, 0);
return 0;
}
/* update client for cpus in yield/halt mask */
static void update_clients(struct cpumask *cpus, bool halt, enum pause_client client,
enum pause_type type)
{
int cpu;
struct halt_cpu_state *halt_cpu_state;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (halt)
halt_cpu_state->client_vote_mask[type] |= client;
else
halt_cpu_state->client_vote_mask[type] &= ~client;
}
}
/* remove cpus that are already halted */
static void update_halt_cpus(struct cpumask *cpus, enum pause_type type)
{
int cpu;
struct halt_cpu_state *halt_cpu_state;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if (halt_cpu_state->client_vote_mask[type])
cpumask_clear_cpu(cpu, cpus);
}
}
/* cpus will be modified */
static int walt_halt_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
{
int ret = 0;
cpumask_t requested_cpus;
unsigned long flags;
raw_spin_lock_irqsave(&halt_lock, flags);
cpumask_copy(&requested_cpus, cpus);
/* remove cpus that are already halted */
update_halt_cpus(cpus, type);
if (cpumask_empty(cpus)) {
update_clients(&requested_cpus, true, client, type);
goto unlock;
}
ret = halt_cpus(cpus, type);
if (ret < 0)
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
cpumask_pr_args(&requested_cpus));
else
update_clients(&requested_cpus, true, client, type);
unlock:
raw_spin_unlock_irqrestore(&halt_lock, flags);
return ret;
}
int walt_pause_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_halt_cpus(cpus, client, HALT);
}
EXPORT_SYMBOL_GPL(walt_pause_cpus);
int walt_partial_pause_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_halt_cpus(cpus, client, PARTIAL_HALT);
}
EXPORT_SYMBOL_GPL(walt_partial_pause_cpus);
/* cpus will be modified */
static int walt_start_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
{
int ret = 0;
cpumask_t requested_cpus;
unsigned long flags;
raw_spin_lock_irqsave(&halt_lock, flags);
cpumask_copy(&requested_cpus, cpus);
update_clients(&requested_cpus, false, client, type);
/* remove cpus that should still be halted */
update_halt_cpus(cpus, type);
ret = start_cpus(cpus, type);
if (ret < 0) {
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
cpumask_pr_args(&requested_cpus));
/* restore/increment ref counts in case of error */
update_clients(&requested_cpus, true, client, type);
}
raw_spin_unlock_irqrestore(&halt_lock, flags);
return ret;
}
int walt_resume_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_start_cpus(cpus, client, HALT);
}
EXPORT_SYMBOL_GPL(walt_resume_cpus);
int walt_partial_resume_cpus(struct cpumask *cpus, enum pause_client client)
{
if (walt_disabled)
return -EAGAIN;
return walt_start_cpus(cpus, client, PARTIAL_HALT);
}
EXPORT_SYMBOL_GPL(walt_partial_resume_cpus);
/* return true if the requested client has fully halted one of the cpus */
bool cpus_halted_by_client(struct cpumask *cpus, enum pause_client client)
{
struct halt_cpu_state *halt_cpu_state;
int cpu;
for_each_cpu(cpu, cpus) {
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
if ((bool)(halt_cpu_state->client_vote_mask[HALT] & client))
return true;
}
return false;
}
static void android_rvh_get_nohz_timer_target(void *unused, int *cpu, bool *done)
{
int i, default_cpu = -1;
struct sched_domain *sd;
cpumask_t active_unhalted;
*done = true;
cpumask_andnot(&active_unhalted, cpu_active_mask, cpu_halt_mask);
if (housekeeping_cpu(*cpu, HK_TYPE_TIMER) && !cpu_halted(*cpu)) {
if (!available_idle_cpu(*cpu))
return;
default_cpu = *cpu;
}
/*
* find first cpu halted by core control and try to avoid
* affecting externally halted cpus.
*/
if (!cpumask_weight(&active_unhalted)) {
cpumask_t tmp_pause, tmp_part_pause, tmp_halt, *tmp;
cpumask_and(&tmp_part_pause, cpu_active_mask, &cpus_part_paused_by_us);
cpumask_and(&tmp_pause, cpu_active_mask, &cpus_paused_by_us);
cpumask_and(&tmp_halt, cpu_active_mask, cpu_halt_mask);
tmp = cpumask_weight(&tmp_part_pause) ? &tmp_part_pause :
cpumask_weight(&tmp_pause) ? &tmp_pause : &tmp_halt;
for_each_cpu(i, tmp) {
if ((*cpu == i) && cpumask_weight(tmp) > 1)
continue;
*cpu = i;
return;
}
}
rcu_read_lock();
for_each_domain(*cpu, sd) {
for_each_cpu_and(i, sched_domain_span(sd),
housekeeping_cpumask(HK_TYPE_TIMER)) {
if (*cpu == i)
continue;
if (!available_idle_cpu(i) && !cpu_halted(i)) {
*cpu = i;
goto unlock;
}
}
}
if (default_cpu == -1) {
for_each_cpu_and(i, &active_unhalted,
housekeeping_cpumask(HK_TYPE_TIMER)) {
if (*cpu == i)
continue;
if (!available_idle_cpu(i)) {
*cpu = i;
goto unlock;
}
}
/* choose any active unhalted cpu */
default_cpu = cpumask_any(&active_unhalted);
if (unlikely(default_cpu >= nr_cpu_ids))
goto unlock;
}
*cpu = default_cpu;
unlock:
rcu_read_unlock();
}
/**
* android_rvh_set_cpus_allowed_by_task: disallow cpus that are halted
*
* NOTES: may be called if migration is disabled for the task
* if per-cpu-kthread, must not deliberately return an invalid cpu
* if !per-cpu-kthread, may return an invalid cpu (reject dest_cpu)
* must not change cpu in in_exec 32bit task case
*/
static void android_rvh_set_cpus_allowed_by_task(void *unused,
const struct cpumask *cpu_valid_mask,
const struct cpumask *new_mask,
struct task_struct *p,
unsigned int *dest_cpu)
{
if (unlikely(walt_disabled))
return;
/* allow kthreads to change affinity regardless of halt status of dest_cpu */
if (p->flags & PF_KTHREAD)
return;
if (cpu_halted(*dest_cpu) && !p->migration_disabled) {
cpumask_t allowed_cpus;
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve))
return;
/* remove halted cpus from the valid mask, and store locally */
cpumask_andnot(&allowed_cpus, cpu_valid_mask, cpu_halt_mask);
cpumask_and(&allowed_cpus, &allowed_cpus, new_mask);
/* do not modify dest_cpu if there are no cpus to choose from */
if (!cpumask_empty(&allowed_cpus))
*dest_cpu = cpumask_any_and_distribute(&allowed_cpus, new_mask);
}
}
/**
* android_rvh_rto_next-cpu: disallow halted cpus for irq workfunctions
*/
static void android_rvh_rto_next_cpu(void *unused, int rto_cpu, struct cpumask *rto_mask, int *cpu)
{
cpumask_t allowed_cpus;
if (unlikely(walt_disabled))
return;
if (cpu_halted(*cpu)) {
/* remove halted cpus from the valid mask, and store locally */
cpumask_andnot(&allowed_cpus, rto_mask, cpu_halt_mask);
*cpu = cpumask_next(rto_cpu, &allowed_cpus);
}
}
/**
* android_rvh_is_cpu_allowed: disallow cpus that are halted
*
* NOTE: this function will not be called if migration is disabled for the task.
*/
static void android_rvh_is_cpu_allowed(void *unused, struct task_struct *p, int cpu, bool *allowed)
{
if (unlikely(walt_disabled))
return;
if (cpumask_test_cpu(cpu, cpu_halt_mask)) {
cpumask_t cpus_allowed;
/* default reject for any halted cpu */
*allowed = false;
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve)) {
/* 32bit task in execve. allow this cpu. */
*allowed = true;
return;
}
/*
* for cfs threads, active cpus in the affinity are allowed
* but halted cpus are not allowed
*/
cpumask_and(&cpus_allowed, cpu_active_mask, p->cpus_ptr);
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_halt_mask);
if (!(p->flags & PF_KTHREAD)) {
if (cpumask_empty(&cpus_allowed)) {
/*
* All affined cpus are inactive or halted.
* Allow this cpu for user threads
*/
*allowed = true;
}
return;
}
/* for kthreads, dying cpus are not allowed */
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_dying_mask);
if (cpumask_empty(&cpus_allowed)) {
/*
* All affined cpus inactive or halted or dying.
* Allow this cpu for kthreads
*/
*allowed = true;
}
}
}
void walt_halt_init(void)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
walt_drain_thread = kthread_run(try_drain_rqs, &drain_data, "halt_drain_rqs");
if (IS_ERR(walt_drain_thread)) {
pr_err("Error creating walt drain thread\n");
return;
}
sched_setscheduler_nocheck(walt_drain_thread, SCHED_FIFO, &param);
register_trace_android_rvh_get_nohz_timer_target(android_rvh_get_nohz_timer_target, NULL);
register_trace_android_rvh_set_cpus_allowed_by_task(
android_rvh_set_cpus_allowed_by_task, NULL);
register_trace_android_rvh_rto_next_cpu(android_rvh_rto_next_cpu, NULL);
register_trace_android_rvh_is_cpu_allowed(android_rvh_is_cpu_allowed, NULL);
}
#endif /* CONFIG_HOTPLUG_CPU */

1193
kernel/sched/walt/walt_lb.c Normal file

File diff suppressed because it is too large Load Diff

431
kernel/sched/walt/walt_rt.c Normal file
View File

@@ -0,0 +1,431 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <trace/hooks/sched.h>
#include "walt.h"
#include "trace.h"
static DEFINE_PER_CPU(cpumask_var_t, walt_local_cpu_mask);
DEFINE_PER_CPU(u64, rt_task_arrival_time) = 0;
static bool long_running_rt_task_trace_rgstrd;
static void rt_task_arrival_marker(void *unused, bool preempt,
struct task_struct *prev, struct task_struct *next,
unsigned int prev_state)
{
unsigned int cpu = raw_smp_processor_id();
if (next->policy == SCHED_FIFO && next != cpu_rq(cpu)->stop)
per_cpu(rt_task_arrival_time, cpu) = rq_clock_task(this_rq());
else
per_cpu(rt_task_arrival_time, cpu) = 0;
}
static void long_running_rt_task_notifier(void *unused, struct rq *rq)
{
struct task_struct *curr = rq->curr;
unsigned int cpu = raw_smp_processor_id();
if (!sysctl_sched_long_running_rt_task_ms)
return;
if (!per_cpu(rt_task_arrival_time, cpu))
return;
if (per_cpu(rt_task_arrival_time, cpu) && curr->policy != SCHED_FIFO) {
/*
* It is possible that the scheduling policy for the current
* task might get changed after task arrival time stamp is
* noted during sched_switch of RT task. To avoid such false
* positives, reset arrival time stamp.
*/
per_cpu(rt_task_arrival_time, cpu) = 0;
return;
}
/*
* Since we are called from the main tick, rq clock task must have
* been updated very recently. Use it directly, instead of
* update_rq_clock_task() to avoid warnings.
*/
if (rq->clock_task -
per_cpu(rt_task_arrival_time, cpu)
> sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC) {
printk_deferred("RT task %s (%d) runtime > %u now=%llu task arrival time=%llu runtime=%llu\n",
curr->comm, curr->pid,
sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC,
rq->clock_task,
per_cpu(rt_task_arrival_time, cpu),
rq->clock_task -
per_cpu(rt_task_arrival_time, cpu));
BUG();
}
}
int sched_long_running_rt_task_ms_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
static DEFINE_MUTEX(mutex);
mutex_lock(&mutex);
ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
if (sysctl_sched_long_running_rt_task_ms > 0 &&
sysctl_sched_long_running_rt_task_ms < 800)
sysctl_sched_long_running_rt_task_ms = 800;
if (write && !long_running_rt_task_trace_rgstrd) {
register_trace_sched_switch(rt_task_arrival_marker, NULL);
register_trace_android_vh_scheduler_tick(long_running_rt_task_notifier, NULL);
long_running_rt_task_trace_rgstrd = true;
}
mutex_unlock(&mutex);
return ret;
}
static void walt_rt_energy_aware_wake_cpu(struct task_struct *task, struct cpumask *lowest_mask,
int ret, int *best_cpu)
{
int cpu;
unsigned long util, best_cpu_util = ULONG_MAX;
unsigned long best_cpu_util_cum = ULONG_MAX;
unsigned long util_cum;
unsigned long tutil = task_util(task);
unsigned int best_idle_exit_latency = UINT_MAX;
unsigned int cpu_idle_exit_latency = UINT_MAX;
bool boost_on_big = rt_boost_on_big();
int cluster;
int order_index = (boost_on_big && num_sched_clusters > 1) ? 1 : 0;
int end_index = 0;
bool best_cpu_lt = true;
if (unlikely(walt_disabled))
return;
if (!ret)
return; /* No targets found */
rcu_read_lock();
if (soc_feat(SOC_ENABLE_SILVER_RT_SPREAD_BIT) && order_index == 0)
end_index = 1;
for (cluster = 0; cluster < num_sched_clusters; cluster++) {
for_each_cpu_and(cpu, lowest_mask, &cpu_array[order_index][cluster]) {
bool lt;
trace_sched_cpu_util(cpu, lowest_mask);
if (!cpu_active(cpu))
continue;
if (cpu_halted(cpu))
continue;
if (sched_cpu_high_irqload(cpu))
continue;
if (__cpu_overutilized(cpu, tutil))
continue;
util = cpu_util(cpu);
lt = (walt_low_latency_task(cpu_rq(cpu)->curr) ||
walt_nr_rtg_high_prio(cpu));
/*
* When the best is suitable and the current is not,
* skip it
*/
if (lt && !best_cpu_lt)
continue;
/*
* Either both are sutilable or unsuitable, load takes
* precedence.
*/
if (!(best_cpu_lt ^ lt) && (util > best_cpu_util))
continue;
/*
* If the previous CPU has same load, keep it as
* best_cpu.
*/
if (best_cpu_util == util && *best_cpu == task_cpu(task))
continue;
/*
* If candidate CPU is the previous CPU, select it.
* Otherwise, if its load is same with best_cpu and in
* a shallower C-state, select it. If all above
* conditions are same, select the least cumulative
* window demand CPU.
*/
cpu_idle_exit_latency = walt_get_idle_exit_latency(cpu_rq(cpu));
util_cum = cpu_util_cum(cpu);
if (cpu != task_cpu(task) && best_cpu_util == util) {
if (best_idle_exit_latency < cpu_idle_exit_latency)
continue;
if (best_idle_exit_latency == cpu_idle_exit_latency &&
best_cpu_util_cum < util_cum)
continue;
}
best_idle_exit_latency = cpu_idle_exit_latency;
best_cpu_util_cum = util_cum;
best_cpu_util = util;
*best_cpu = cpu;
best_cpu_lt = lt;
}
if (cluster < end_index) {
if (*best_cpu == -1 || !available_idle_cpu(*best_cpu))
continue;
}
if (*best_cpu != -1)
break;
}
rcu_read_unlock();
}
#ifdef CONFIG_UCLAMP_TASK
static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned int min_cap;
unsigned int max_cap;
unsigned int cpu_cap;
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
max_cap = uclamp_eff_value(p, UCLAMP_MAX);
cpu_cap = capacity_orig_of(cpu);
return cpu_cap >= min(min_cap, max_cap);
}
#else
static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
{
return true;
}
#endif
/*
* walt specific should_honor_rt_sync (see rt.c). this will honor
* the sync flag regardless of whether the current waker is cfs or rt
*/
static inline bool walt_should_honor_rt_sync(struct rq *rq, struct task_struct *p,
bool sync)
{
return sync &&
p->prio <= rq->rt.highest_prio.next &&
rq->rt.rt_nr_running <= 2;
}
enum rt_fastpaths {
NONE = 0,
NON_WAKEUP,
SYNC_WAKEUP,
CLUSTER_PACKING_FASTPATH,
};
static void walt_select_task_rq_rt(void *unused, struct task_struct *task, int cpu,
int sd_flag, int wake_flags, int *new_cpu)
{
struct task_struct *curr;
struct rq *rq, *this_cpu_rq;
bool may_not_preempt;
bool sync = !!(wake_flags & WF_SYNC);
int ret, target = -1, this_cpu;
struct cpumask *lowest_mask = NULL;
int packing_cpu = -1;
int fastpath = NONE;
struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
struct walt_task_struct *wts;
if (unlikely(walt_disabled))
return;
/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) {
fastpath = NON_WAKEUP;
goto out;
}
this_cpu = raw_smp_processor_id();
this_cpu_rq = cpu_rq(this_cpu);
wts = (struct walt_task_struct *) task->android_vendor_data1;
/*
* Respect the sync flag as long as the task can run on this CPU.
*/
if (sysctl_sched_sync_hint_enable && cpu_active(this_cpu) && !cpu_halted(this_cpu) &&
cpumask_test_cpu(this_cpu, task->cpus_ptr) &&
cpumask_test_cpu(this_cpu, &wts->reduce_mask) &&
walt_should_honor_rt_sync(this_cpu_rq, task, sync)) {
fastpath = SYNC_WAKEUP;
*new_cpu = this_cpu;
goto out;
}
*new_cpu = cpu; /* previous CPU as back up */
rq = cpu_rq(cpu);
rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
/*
* If the current task on @p's runqueue is a softirq task,
* it may run without preemption for a time that is
* ill-suited for a waiting RT task. Therefore, try to
* wake this RT task on another runqueue.
*
* Otherwise, just let it ride on the affined RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*
* We take into account the capacity of the CPU to ensure it fits the
* requirement of the task - which is only important on heterogeneous
* systems like big.LITTLE.
*/
may_not_preempt = cpu_busy_with_softirqs(cpu);
lowest_mask = this_cpu_cpumask_var_ptr(walt_local_cpu_mask);
/*
* If we're on asym system ensure we consider the different capacities
* of the CPUs when searching for the lowest_mask.
*/
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, task,
lowest_mask, walt_rt_task_fits_capacity);
packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
if (packing_cpu >= 0) {
while (packing_cpu < WALT_NR_CPUS) {
if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
cpu_active(packing_cpu) &&
!cpu_halted(packing_cpu) &&
(cpu_rq(packing_cpu)->rt.rt_nr_running <= 1))
break;
packing_cpu++;
}
if (packing_cpu < WALT_NR_CPUS) {
fastpath = CLUSTER_PACKING_FASTPATH;
*new_cpu = packing_cpu;
goto unlock;
}
}
cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
if (!cpumask_empty(&lowest_mask_reduced))
walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, &target);
if (target == -1)
walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, &target);
/*
* If cpu is non-preemptible, prefer remote cpu
* even if it's running a higher-prio task.
* Otherwise: Don't bother moving it if the destination CPU is
* not running a lower priority task.
*/
if (target != -1 &&
(may_not_preempt || task->prio < cpu_rq(target)->rt.highest_prio.curr))
*new_cpu = target;
/* if backup or chosen cpu is halted, pick something else */
if (cpu_halted(*new_cpu)) {
cpumask_t non_halted;
/* choose the lowest-order, unhalted, allowed CPU */
cpumask_andnot(&non_halted, task->cpus_ptr, cpu_halt_mask);
target = cpumask_first(&non_halted);
if (target < nr_cpu_ids)
*new_cpu = target;
}
unlock:
rcu_read_unlock();
out:
trace_sched_select_task_rt(task, fastpath, *new_cpu, lowest_mask);
}
static void walt_rt_find_lowest_rq(void *unused, struct task_struct *task,
struct cpumask *lowest_mask, int ret, int *best_cpu)
{
int packing_cpu = -1;
int fastpath = 0;
struct walt_task_struct *wts;
struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
if (unlikely(walt_disabled))
return;
wts = (struct walt_task_struct *) task->android_vendor_data1;
packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
if (packing_cpu >= 0) {
while (packing_cpu < WALT_NR_CPUS) {
if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
cpu_active(packing_cpu) &&
!cpu_halted(packing_cpu) &&
(cpu_rq(packing_cpu)->rt.rt_nr_running <= 2))
break;
packing_cpu++;
}
if (packing_cpu < WALT_NR_CPUS) {
fastpath = CLUSTER_PACKING_FASTPATH;
*best_cpu = packing_cpu;
goto out;
}
}
cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
if (!cpumask_empty(&lowest_mask_reduced))
walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, best_cpu);
if (*best_cpu == -1)
walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, best_cpu);
/*
* Walt was not able to find a non-halted best cpu. Ensure that
* find_lowest_rq doesn't use a halted cpu going forward, but
* does a best effort itself to find a good CPU.
*/
if (*best_cpu == -1)
cpumask_andnot(lowest_mask, lowest_mask, cpu_halt_mask);
out:
trace_sched_rt_find_lowest_rq(task, fastpath, *best_cpu, lowest_mask);
}
void walt_rt_init(void)
{
unsigned int i;
for_each_possible_cpu(i) {
if (!(zalloc_cpumask_var_node(&per_cpu(walt_local_cpu_mask, i),
GFP_KERNEL, cpu_to_node(i)))) {
pr_err("walt_local_cpu_mask alloc failed for cpu%d\n", i);
return;
}
}
register_trace_android_rvh_select_task_rq_rt(walt_select_task_rq_rt, NULL);
register_trace_android_rvh_find_lowest_rq(walt_rt_find_lowest_rq, NULL);
}

161
kernel/sched/walt/walt_tp.c Normal file
View File

@@ -0,0 +1,161 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
#include <linux/cpu.h>
#include <linux/tracepoint.h>
#include <trace/hooks/sched.h>
#include "trace.h"
#define CREATE_TRACE_POINTS
#include "perf_trace_counters.h"
unsigned int sysctl_sched_dynamic_tp_enable;
#define USE_CPUHP_STATE CPUHP_AP_ONLINE_DYN
DEFINE_PER_CPU(u32, cntenset_val);
DEFINE_PER_CPU(unsigned long, previous_ccnt);
DEFINE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
DEFINE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
DEFINE_PER_CPU(u32, old_pid);
DEFINE_PER_CPU(u32, hotplug_flag);
DEFINE_PER_CPU(u64, prev_time);
static int tracectr_cpu_hotplug_coming_up(unsigned int cpu)
{
per_cpu(hotplug_flag, cpu) = 1;
return 0;
}
static void setup_prev_cnts(u32 cpu, u32 cnten_val)
{
int i;
if (cnten_val & CC)
per_cpu(previous_ccnt, cpu) =
read_sysreg(pmccntr_el0);
for (i = 0; i < NUM_L1_CTRS; i++) {
if (cnten_val & (1 << i)) {
/* Select */
write_sysreg(i, pmselr_el0);
isb();
/* Read value */
per_cpu(previous_l1_cnts[i], cpu) =
read_sysreg(pmxevcntr_el0);
}
}
}
void tracectr_notifier(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next,
unsigned int prev_state)
{
u32 cnten_val;
int current_pid;
u32 cpu = task_cpu(next);
u64 now;
if (!trace_sched_switch_with_ctrs_enabled())
return;
current_pid = next->pid;
if (per_cpu(old_pid, cpu) != -1) {
cnten_val = read_sysreg(pmcntenset_el0);
per_cpu(cntenset_val, cpu) = cnten_val;
/* Disable all the counters that were enabled */
write_sysreg(cnten_val, pmcntenclr_el0);
if (per_cpu(hotplug_flag, cpu) == 1) {
per_cpu(hotplug_flag, cpu) = 0;
setup_prev_cnts(cpu, cnten_val);
} else {
trace_sched_switch_with_ctrs(preempt, prev, next);
now = sched_clock();
if ((now - per_cpu(prev_time, cpu)) > NSEC_PER_SEC) {
trace_sched_switch_ctrs_cfg(cpu);
per_cpu(prev_time, cpu) = now;
}
}
/* Enable all the counters that were disabled */
write_sysreg(cnten_val, pmcntenset_el0);
}
per_cpu(old_pid, cpu) = current_pid;
}
static void register_sched_switch_ctrs(void)
{
int cpu, rc;
for_each_possible_cpu(cpu)
per_cpu(old_pid, cpu) = -1;
rc = cpuhp_setup_state_nocalls(USE_CPUHP_STATE, "tracectr_cpu_hotplug",
tracectr_cpu_hotplug_coming_up, NULL);
if (rc >= 0)
register_trace_sched_switch(tracectr_notifier, NULL);
}
static void unregister_sched_switch_ctrs(void)
{
unregister_trace_sched_switch(tracectr_notifier, NULL);
cpuhp_remove_state_nocalls(USE_CPUHP_STATE);
}
const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
{
#ifdef CONFIG_SMP
return rd ? rd->span : NULL;
#else
return NULL;
#endif
}
static void sched_overutilized(void *data, struct root_domain *rd,
bool overutilized)
{
if (trace_sched_overutilized_enabled()) {
char span[SPAN_SIZE];
cpumap_print_to_pagebuf(false, span, sched_trace_rd_span(rd));
trace_sched_overutilized(overutilized, span);
}
}
static void walt_register_dynamic_tp_events(void)
{
register_trace_sched_overutilized_tp(sched_overutilized, NULL);
register_sched_switch_ctrs();
}
static void walt_unregister_dynamic_tp_events(void)
{
unregister_trace_sched_overutilized_tp(sched_overutilized, NULL);
unregister_sched_switch_ctrs();
}
int sched_dynamic_tp_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
static DEFINE_MUTEX(mutex);
int ret = 0, *val = (unsigned int *)table->data;
unsigned int old_val;
mutex_lock(&mutex);
old_val = sysctl_sched_dynamic_tp_enable;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write || (old_val == sysctl_sched_dynamic_tp_enable))
goto done;
if (*val)
walt_register_dynamic_tp_events();
else
walt_unregister_dynamic_tp_events();
done:
mutex_unlock(&mutex);
return ret;
}

View File

@@ -36,10 +36,6 @@
#include <asm/syscall.h>
#endif
#if defined(CONFIG_SECURITY_DSMS) && defined(CONFIG_SECURITY_KUMIHO)
#include <linux/dsms.h>
#endif
#ifdef CONFIG_SECCOMP_FILTER
#include <linux/file.h>
#include <linux/filter.h>
@@ -964,583 +960,8 @@ static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
SECCOMP_LOG_TRACE |
SECCOMP_LOG_LOG;
/* SEC_PRODUCT_FEATURE_SECURITY_SUPPORT_DSMS { */
#if defined(CONFIG_SECURITY_DSMS) && defined(CONFIG_SECURITY_KUMIHO)
/* append_string_s: append simple string to a buffer
* @target: pointer to a string, which is updated on success to
* point to the next available space
* @available_size: pointer to count of available bytes in target, including
* terminator; updated on success
* @source: nonnull pointer to text (zero-terminated, unless @source_len > 0)
* to be appended to *target
* @source_len: if > 0, exactly the number of bytes in @source which will be
* appended
* Returns 0 if *source was completely copied, 1 otherwise (null source,
* or not enough space in *target)
*/
static int append_string_s(char **target, int *available_size,
const char *source, int source_len)
{
if (!source) // sanity check
return 1;
while (*available_size > 1 && (source_len > 0 || *source)) {
*((*target)++) = *source++;
--(*available_size);
if (source_len > 0)
--source_len;
}
if (*available_size > 0)
**target = 0;
return *source != 0; // copy terminated prematurely
}
/* append_string: append to a buffer message, optionally quoting/escaping
* @target: pointer to a string address, which is updated on success to
* point to the next available space
* @available_size: pointer to count of available bytes in target, including
* terminator; updated on success
* @source: string to be appended to *target; if @source_length is zero,
* must be zero-terminated
* @source_len: if > 0, exactly the number of bytes in @source which will be
* appended
* @quote_escape: if true, add open/closing quotes and escapes nongraphic
* characters
* Returns 0 if *source was completely copied, 1 otherwise
*/
static int append_string(char **target, int *available_size,
char *source, int source_length,
int quote_escape)
{
if (source_length > 0)
source[--source_length] = 0;
if (quote_escape) {
const char *p;
if (*available_size < 2)
return 1;
*((*target)++) = '"';
--(*available_size);
for (p = source; source_length > 0 || *p; ++p) {
char ss[5];
ss[2] = 0;
switch (*p) {
case '\t':
*ss = '\\'; ss[1] = 't'; break;
case '\n':
*ss = '\\'; ss[1] = 'n'; break;
case '\r':
*ss = '\\'; ss[1] = 'r'; break;
case '\\':
*ss = '\\'; ss[1] = '\\'; break;
case '"':
*ss = '\\'; ss[1] = '"'; break;
default:
if (*(unsigned char *)p < ' ' ||
*(unsigned char *)p > 127) {
sprintf(ss, "\\%03o",
*(unsigned char *)p);
} else { // ordinary character
*ss = *p;
ss[1] = 0;
}
}
if (append_string_s(target, available_size, ss, 0))
return 1;
if (source_length > 0)
--source_length;
}
return append_string_s(target, available_size, "\"", 0);
}
return append_string_s(target, available_size, source, source_length);
}
/* append_string_f: append formatted data to a buffer message, optionally
* quoting/escaping
* @target: pointer to a string address, which is updated on success to
* point to the next available space
* @available_size: pointer to count of available bytes in target, including
* terminator; updated on success
* @aux_buffer: pointer to an auxiliary buffer, which should be enough for
* holding all formatted arguments
* @aux_buffer_size: size of @aux_buffer
* @quote_escape: if true, add open/closing quotes and escapes nongraphic
* characters
* @format: formatting string, printf-style
* All additional arguments are formatted into @aux_buffer
* Returns 0 if all arguments were formatted and completely copied, 1 otherwise
*/
static int append_string_f(char **target, int *available_size,
char *aux_buffer, size_t aux_buffer_size,
int quote_escape, const char *format, ...)
{
size_t vsnp_ret;
va_list ap;
va_start(ap, format);
vsnp_ret = vsnprintf(aux_buffer, aux_buffer_size, format, ap);
va_end(ap);
return append_string(target, available_size, aux_buffer, 0,
quote_escape) || vsnp_ret >= aux_buffer_size;
}
/* clone_from_user: returns copy of userspace region, if possibile
* @dst: copy destination; if 0, allocate space
* @src: userspace address
* @size: address of size of region to be copied; will be updated with
* count of effectively copied bytes
* @buffer: pointer to a string address, used to record any diagnostic
* messages; will be updated to point to the next available space
* @buffer_size: pointer to count of available bytes in @buffer, including
* terminator; updated after use
* @task_name: short nonnull tag to identify caller
* @ne: pointer to flag, which wil be nonzero if @buffer_size was not
* enough to hold all diagnostic messages
* Returns effective destination, 0 if @src was invalid or allocation failed
*/
static void *clone_from_user(void *dst, const void *src, size_t *size,
char **buffer, int *buffer_size, const char *task_name, int *ne)
{
void *eff_dst;
char aux_buffer[100];
size_t uncopied_size;
if (!src) {
*ne |= append_string_f(buffer, buffer_size,
aux_buffer, sizeof(aux_buffer), 0,
" (%s: null src)", task_name);
*size = 0;
return 0;
}
if (!dst) {
eff_dst = kcalloc(1, *size, GFP_KERNEL);
if (!eff_dst) {
*ne |= append_string_f(buffer, buffer_size,
aux_buffer, sizeof(aux_buffer), 0,
" (%s: failed alloc)", task_name);
*size = 0;
return 0;
}
} else
eff_dst = dst;
uncopied_size = copy_from_user(eff_dst, src, *size);
if (uncopied_size)
*ne |= append_string_f(buffer, buffer_size,
aux_buffer, sizeof(aux_buffer), 0,
" (%s: copied only %zu of %zu bytes)",
task_name, *size - uncopied_size, *size);
*size -= uncopied_size;
return eff_dst;
}
/* Descriptor of syscalls for a more user-friendly display */
struct syscall_api {
int nr; // key: syscall number
const char *name; // user-readable name
unsigned char nargs; // argument count
unsigned char arg_str; // bitmap marking which arguments are text strings
int (*dump)(char **buffer, // optional custom formatter
int *available_size, const struct seccomp_data *sd);
// Constants for struct syscall_api.arg_str
#define AS0 1 // first argument is a string
#define AS1 (1 << 1)
#define AS2 (1 << 2)
#define AS3 (1 << 3)
#define AS4 (1 << 4)
#define AS5 (1 << 5)
};
#include <uapi/linux/un.h> // sockaddr_un
/* Specialized formatter for some kinds of socket address */
static int dump_sockaddr(char **buffer, int *buffer_size,
char *aux_buffer, size_t aux_buffer_size,
const struct sockaddr *s_addr, int addr_len)
{
int ne = append_string_f(buffer, buffer_size,
aux_buffer, aux_buffer_size, 0,
" fam %d", s_addr->sa_family);
if (!ne)
switch (s_addr->sa_family) {
case AF_UNIX:
if (addr_len >= sizeof(struct sockaddr_un)) {
struct sockaddr_un *s_un =
(struct sockaddr_un *)s_addr;
ne |= append_string_f(buffer, buffer_size,
aux_buffer, aux_buffer_size, 0,
" UN \"%s\"", s_un->sun_path);
}
break;
case AF_INET:
if (addr_len >= sizeof(struct sockaddr_in)) {
struct sockaddr_in *s_in =
(struct sockaddr_in *)s_addr;
ne |= append_string_f(buffer, buffer_size,
aux_buffer, aux_buffer_size, 0,
" IP P%u A%pI4",
s_in->sin_port, &s_in->sin_addr);
}
break;
case AF_INET6:
if (addr_len >= sizeof(struct sockaddr_in6)) {
struct sockaddr_in6 *s_in =
(struct sockaddr_in6 *)s_addr;
ne |= append_string_f(buffer, buffer_size,
aux_buffer, aux_buffer_size, 0,
" IP6 P%uFI%u A%pI6 S%u",
s_in->sin6_port, s_in->sin6_flowinfo,
&s_in->sin6_addr, s_in->sin6_scope_id);
}
break;
}
return ne;
}
/* Specialized formatter for struct msghdr */
static int dump_msghdr(char **buffer, int *buffer_size,
char *aux_buffer, size_t aux_buffer_size,
const struct user_msghdr *msg, int user_flags)
{
int ne = append_string_f(buffer, buffer_size, aux_buffer,
aux_buffer_size, 0,
" namelen %d iovlen %lu controllen %zu flags %u uflags %d",
msg->msg_namelen, msg->msg_iovlen,
msg->msg_controllen, msg->msg_flags, user_flags);
if (ne)
return 1;
if (msg->msg_iovlen > 0) { /* Process message part contents */
struct iovec *iovec_p;
size_t eff_iovec_size = sizeof(struct iovec) * msg->msg_iovlen;
iovec_p = clone_from_user(0, (void *)msg->msg_iov,
&eff_iovec_size, buffer, buffer_size,
"iovec", &ne);
if (eff_iovec_size) {
/* For each message part dump its index,
* length and contents (up to DUMP_MAX bytes)
*/
int i;
#define DUMP_MAX 20 // arbitrary
for (i = 0;
!ne &&
i < eff_iovec_size / sizeof(struct iovec);
++i) {
size_t part_len = iovec_p[i].iov_len;
char bbuffer[20];
unsigned char *part;
ne |= append_string_f(buffer, buffer_size,
bbuffer, sizeof(bbuffer), 0,
" M%d(%zu):", i, part_len);
if (ne)
break;
if (part_len > DUMP_MAX)
part_len = DUMP_MAX;
part = clone_from_user(0,
(void *)iovec_p[i].iov_base,
&part_len, buffer, buffer_size,
"iovec part", &ne);
if (part_len) {
ne |= append_string(buffer,
buffer_size,
part,
part_len, 1);
}
kfree(part);
}
#undef DUMP_MAX
}
kfree(iovec_p);
}
if (msg->msg_namelen > 1 && msg->msg_name) {
/* process message destination, if any; probably nononessential
* if dump_sockaddr is called too
*/
char *name_copy; // copy of msg->msg_name from userspace
size_t namelen = msg->msg_namelen; // effective length after copying from userspace
ne |= append_string_s(buffer, buffer_size, " {", 0);
name_copy = clone_from_user(0, msg->msg_name,
&namelen, buffer, buffer_size, "name", &ne);
if (!name_copy)
return ne;
if (namelen >= sizeof(struct sockaddr_in)) {
/* Maybe IPv4? */
struct sockaddr_in *sin =
(struct sockaddr_in *)name_copy;
char sin_buf[3 + 6 + 6 + 4 * 4 + 10];
ne |= append_string_f(buffer, buffer_size,
sin_buf, sizeof(sin_buf), 0,
"IP F%uP%u A%pI4",
sin->sin_family, sin->sin_port,
&sin->sin_addr);
}
if (namelen >= sizeof(struct sockaddr_in6)) {
/* Maybe IPv6? */
struct sockaddr_in6 *sin =
(struct sockaddr_in6 *)name_copy;
char sin_buf[4 + 6 + 6 + 12 + 8 * 5 + 12 + 10];
ne |= append_string_f(buffer, buffer_size,
sin_buf, sizeof(sin_buf), 0,
" IP6 F%uP%uFI%u A%pI6 S%u",
sin->sin6_family, sin->sin6_port,
sin->sin6_flowinfo, &sin->sin6_addr,
sin->sin6_scope_id);
}
ne |= append_string_s(buffer, buffer_size, "}", 0);
kfree(name_copy);
}
return ne;
}
/* Specialized formatter for the sendmsg syscall */
static int dump_sendmsg(char **buffer, int *buffer_size,
const struct seccomp_data *sd)
{
int ne; // *buffer_size was not enough, something was truncated
#define BUFFER_SZ 500 /* size of auxiliary buffer for assorted data */
char *sbuffer = kcalloc(1, BUFFER_SZ, GFP_KERNEL);
if (!sbuffer)
return 1;
ne = append_string_f(buffer, buffer_size, sbuffer, BUFFER_SZ, 0,
" sock {fd %lld", sd->args[0]);
if (ne)
goto end;
{ /* Dump information on socket's peer */
int err;
struct socket *s_socket = sockfd_lookup(sd->args[0], &err);
if (s_socket) {
struct sockaddr s_addr;
ne |= append_string_f(buffer, buffer_size,
sbuffer, BUFFER_SZ, 0,
" type %d", s_socket->type);
if (ne)
goto end;
err = kernel_getpeername(s_socket, &s_addr);
if (err > 0)
ne |= dump_sockaddr(buffer, buffer_size,
sbuffer, BUFFER_SZ, &s_addr, err);
} else {
ne |= append_string_f(buffer, buffer_size,
sbuffer, BUFFER_SZ, 0,
" (socket lookup failed %d)", err);
}
if (ne)
goto end;
}
ne = append_string_s(buffer, buffer_size, "}", 0);
if (!ne && sd->args[1]) {
struct user_msghdr msg;
ne = copy_from_user((void *)&msg,
(void *)sd->args[1], sizeof(msg))
? append_string_s(buffer, buffer_size,
"(failed to copy)", 0)
: dump_msghdr(buffer, buffer_size, sbuffer, BUFFER_SZ,
&msg, sd->args[2]);
}
end:
kfree(sbuffer);
return ne;
#undef BUFFER_SZ
}
/* Default formatter for syscalls. Dumps parameters as numbers and strings. */
static int dump_syscall_default(char **buffer, int *buffer_size,
const struct syscall_api *api,
const struct seccomp_data *sd)
{
int ne = 0;
size_t j;
#define DUMP_MAX 1000 // size should be at most MSG_SZ
for (j = 0; j < ARRAY_SIZE(sd->args) && j < api->nargs; ++j) {
if (api->arg_str & (1 << j)) { // parameter is a string
char quote = 1;
const char *txt = (const char *)sd->args[j];
char *u_bufferp;
size_t u_buffersz = DUMP_MAX;
if (!txt)
quote = 0;
u_bufferp = clone_from_user(0, txt, &u_buffersz,
buffer, buffer_size, "args", &ne);
if (u_buffersz) {
if (append_string_s(buffer, buffer_size, " ", 0) ||
append_string(buffer, buffer_size, u_bufferp,
u_buffersz, quote))
ne = 1;
}
kfree(u_bufferp);
if (ne)
break;
} else {
char sbuffer[20];
ne |= append_string_f(buffer, buffer_size, sbuffer,
sizeof(sbuffer), 0, " %lld", sd->args[j]);
if (ne)
break;
}
}
#undef DUMP_MAX
return ne;
}
/* dump_syscall_base: generate string summarizing call arguments
* @buffer: target string
* @buffer_size: target size, including terminator
* @sd: seccomp invocation descriptor
* Returns 0 if successful, 1 if text was clipped
*/
static int dump_syscall_base(char *buffer, int buffer_size,
const struct seccomp_data *sd)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
static struct syscall_api apis[] = {
{__NR_read, "read", 3},
{__NR_write, "write", 3},
#ifdef __NR_open
{__NR_open, "open", 3, AS0},
#endif
{__NR_close, "close", 1},
#ifdef __NR_stat
{__NR_stat, "stat", 2, AS0},
#endif
{__NR_fstat, "fstat", 2},
#ifdef __NR_lstat
{__NR_lstat, "lstat", 2, AS0},
#endif
{__NR_sendto, "sendto", 6},
{__NR_sendmsg, "sendmsg", 3, 0, dump_sendmsg},
#ifdef __NR_unlinkat
{__NR_unlinkat, "unlinkat", 3, AS1},
#endif
#ifdef __NR_renameat
{__NR_renameat, "renameat", 4, AS1 | AS3},
#endif
#ifdef __NR_statfs
{__NR_statfs, "statfs", 2, AS0},
#endif
#ifdef __NR_faccessat
{__NR_faccessat, "faccessat", 3, AS1},
#endif
#ifdef __NR_chmodat
{__NR_fchmodat, "fchmodat", 3, AS1},
#endif
#ifdef __NR_openat
{__NR_openat, "openat", 4, AS1},
#endif
#ifdef __NR_readlinkat
{__NR_readlinkat, "readlinkat", 4, AS1},
#endif
};
#pragma GCC diagnostic pop
char sbuffer[100];
size_t i;
char ne = 0; /* buffer_size was not enough */
char syscall_found = 0;
if (buffer_size < 1)
return 1;
*buffer = 0;
for (i = 0; i < ARRAY_SIZE(apis); ++i)
if (apis[i].nr == sd->nr) {
syscall_found = 1;
ne = append_string_f(&buffer, &buffer_size,
sbuffer, sizeof(sbuffer), 0,
"SC %d/%s", sd->nr, apis[i].name)
|| (apis[i].dump
? apis[i].dump(&buffer, &buffer_size, sd)
: dump_syscall_default(&buffer, &buffer_size,
apis + i, sd));
break;
}
if (!syscall_found) {
ne |= append_string_f(&buffer, &buffer_size,
sbuffer, sizeof(sbuffer), 0, "SC %d", sd->nr);
if (!ne)
for (i = 0; i < ARRAY_SIZE(sd->args); ++i) {
ne |= append_string_f(&buffer, &buffer_size,
sbuffer, sizeof(sbuffer), 0,
" %lld", sd->args[i]);
if (ne)
break;
}
}
return ne;
}
/* dump_syscall: format string describing syscall caller and arguments
* @buffer: target string, at least 4 chars long
* @buffer_size: available target size, including terminator
* @command: command of process invoking syscall
* @signr: signal number
* @sd: nonnull pointer to seccomp descriptor
*/
static void dump_syscall(char *buffer, int buffer_size, const char *command,
long signr, const struct seccomp_data *sd)
{
int n_copied = snprintf(buffer, buffer_size,
"seccomp '%s' signum %ld pid %d uid %d ",
command, signr, current->pid, current_uid().val);
n_copied = n_copied < buffer_size
? dump_syscall_base(buffer + n_copied, buffer_size - n_copied,
sd)
: 1;
if (n_copied) // something was truncated
strscpy(buffer + buffer_size - 4, "...", sizeof("..."));
}
#define MSG_SZ 1024 // Limit actually set by DSMS
noinline void seccomp_notify_dsms(unsigned long syscall, long signr, u32 action,
const struct seccomp_data *sd)
{
/* The current thread command may be different from the main thread */
struct task_struct *main_thread = current->group_leader;
char comm_buf[sizeof(main_thread->comm)];
get_task_comm(comm_buf, main_thread);
if (unlikely(strncmp("kumihodecoder", comm_buf, sizeof(main_thread->comm)) == 0)) {
char *msg = kcalloc(1, MSG_SZ, GFP_KERNEL);
int i;
if (msg) {
dump_syscall(msg, MSG_SZ, comm_buf, signr, sd);
i = dsms_send_message("KMH0", msg, action);
if (unlikely(i != DSMS_SUCCESS))
pr_warn("%s::dsms_send_message failed: error %d msg <%s>\n",
__func__, i, msg);
kfree(msg);
} else
pr_warn("%s: out of memory", __func__);
}
}
#undef MSG_SZ
#else
#define seccomp_notify_dsms(syscall, signumber, action, sd) /* nothing */
#endif
/* SEC_PRODUCT_FEATURE_SECURITY_SUPPORT_DSMS } */
static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
bool requested, const struct seccomp_data *sd)
bool requested)
{
bool log = false;
@@ -1569,8 +990,6 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
default:
log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
}
if (action != SECCOMP_RET_ALLOW)
seccomp_notify_dsms(syscall, signr, action, sd);
/*
* Emit an audit message when the action is RET_KILL_*, RET_LOG, or the
@@ -1610,7 +1029,7 @@ static void __secure_computing_strict(int this_syscall)
dump_stack();
#endif
current->seccomp.mode = SECCOMP_MODE_DEAD;
seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true, 0);
seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
do_exit(SIGKILL);
}
@@ -1870,7 +1289,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
return 0;
case SECCOMP_RET_LOG:
seccomp_log(this_syscall, 0, action, true, sd);
seccomp_log(this_syscall, 0, action, true);
return 0;
case SECCOMP_RET_ALLOW:
@@ -1885,7 +1304,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
case SECCOMP_RET_KILL_PROCESS:
default:
current->seccomp.mode = SECCOMP_MODE_DEAD;
seccomp_log(this_syscall, SIGSYS, action, true, sd);
seccomp_log(this_syscall, SIGSYS, action, true);
/* Dump core only if this is the last remaining thread. */
if (action != SECCOMP_RET_KILL_THREAD ||
(atomic_read(&current->signal->live) == 1)) {
@@ -1902,7 +1321,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
unreachable();
skip:
seccomp_log(this_syscall, 0, action, match ? match->log : false, sd);
seccomp_log(this_syscall, 0, action, match ? match->log : false);
return -1;
}
#else

View File

@@ -58,10 +58,6 @@
#include <asm/cacheflush.h>
#include <asm/syscall.h> /* for syscall_get_* */
#ifdef CONFIG_SAMSUNG_FREECESS
#include <linux/freecess.h>
#endif
#undef CREATE_TRACE_POINTS
#include <trace/hooks/signal.h>
#include <trace/hooks/dtask.h>
@@ -1319,18 +1315,6 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p
unsigned long flags;
int ret = -ESRCH;
trace_android_vh_do_send_sig_info(sig, current, p);
#ifdef CONFIG_SAMSUNG_FREECESS
/*
* System will send SIGIO to the app that locked the file when other apps access the file.
* Report SIGIO to prevent other apps from getting stuck
*/
if ((sig == SIGKILL || sig == SIGTERM || sig == SIGABRT || sig == SIGQUIT || sig == SIGIO)) {
/* Report pid if signal is fatal */
sig_report(p, sig != SIGIO);
}
#endif
if (lock_task_sighand(p, &flags)) {
ret = send_signal_locked(sig, info, p, type);
unlock_task_sighand(p, &flags);

View File

@@ -75,10 +75,6 @@
#include <asm/io.h>
#include <asm/unistd.h>
#ifdef CONFIG_SECURITY_DEFEX
#include <linux/defex.h>
#endif
#include "uid16.h"
#include <trace/hooks/sys.h>
@@ -880,10 +876,6 @@ long __sys_setfsuid(uid_t uid)
if (!uid_valid(kuid))
return old_fsuid;
#ifdef CONFIG_SECURITY_DEFEX
if (task_defex_enforce(current, NULL, -__NR_setfsuid))
return old_fsuid;
#endif
new = prepare_creds();
if (!new)
return old_fsuid;
@@ -928,10 +920,6 @@ long __sys_setfsgid(gid_t gid)
if (!gid_valid(kgid))
return old_fsgid;
#ifdef CONFIG_SECURITY_DEFEX
if (task_defex_enforce(current, NULL, -__NR_setfsgid))
return old_fsgid;
#endif
new = prepare_creds();
if (!new)
return old_fsgid;

View File

@@ -63,8 +63,6 @@ static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
extern void log_suspend_abort_reason(const char *fmt, ...);
/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
@@ -195,7 +193,6 @@ static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
* timers queued for the future, we set the hrtimer to fire when
* the next future alarm timer expires.
*/
#define MAX_FUNC_NAME 20
static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
{
struct alarm *alarm = container_of(timer, struct alarm, timer);
@@ -203,18 +200,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
unsigned long flags;
int ret = HRTIMER_NORESTART;
int restart = ALARMTIMER_NORESTART;
char func_name[MAX_FUNC_NAME];
spin_lock_irqsave(&base->lock, flags);
alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
if (alarm->function) {
if (alarm->function)
restart = alarm->function(alarm, base->get_ktime());
snprintf(func_name, MAX_FUNC_NAME, "%ps\n", alarm->function);
if (strncmp(func_name, "timerfd_alarmproc", strlen("timerfd_alarmproc")))
pr_info("PM: %ps is fired!\n", alarm->function);
}
spin_lock_irqsave(&base->lock, flags);
if (restart != ALARMTIMER_NORESTART) {
@@ -253,7 +245,6 @@ static int alarmtimer_suspend(struct device *dev)
struct rtc_device *rtc;
unsigned long flags;
struct rtc_time tm;
struct alarm *min_alarm = NULL;
spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
@@ -283,22 +274,12 @@ static int alarmtimer_suspend(struct device *dev)
expires = next->expires;
min = delta;
type = i;
min_alarm = container_of(next, struct alarm, node);
}
}
if (min == 0)
return 0;
if (min_alarm)
pr_info("soonest alarm : %ps\n", min_alarm->function);
if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
if (min_alarm) {
pr_info("alarmtimer suspending blocked by %ps\n", min_alarm->function);
log_suspend_abort_reason("alarmtimer suspending blocked by %ps\n",
min_alarm->function);
}
pm_wakeup_event(dev, 2 * MSEC_PER_SEC);
return -EBUSY;
}

View File

@@ -147,6 +147,35 @@ config PREEMPTIRQ_TRACEPOINTS
Create preempt/irq toggle tracepoints if needed, so that other parts
of the kernel can use them to generate or add hooks to them.
config IPC_LOGGING
tristate "Debug Logging for IPC Drivers"
select GENERIC_TRACER
depends on DEBUG_FS
help
IPC Logging driver provides a logging option for IPC Drivers.
This provides a cyclic buffer based logging support in a driver
specific context. This driver also provides a debugfs interface
to dump the logs in a live fashion.
If in doubt, say no.
config IPC_LOGGING_CDEV
tristate "Ipc Logging Character Device"
depends on IPC_LOGGING
help
Character device for ipc logging. Reading it will extract ipc logs up to
the specified size and increment the read index of the ipc log buffer.
Read function will return EOF when there is no longer any data to read
in the ipc log buffer.
config IPC_LOG_MINIDUMP_BUFFERS
int "Ipc log buffers count that can be dumped with minidump"
depends on IPC_LOGGING
default 0
help
This option is used to configure maximum number of ipc log
buffers that can be dumped by minidump.
# All tracer options should select GENERIC_TRACER. For those options that are
# enabled by all tracers (context switch and event tracer) they select TRACING.
# This allows those options to appear when no other tracer is selected. But the

View File

@@ -110,4 +110,8 @@ obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
obj-$(CONFIG_RV) += rv/
obj-$(CONFIG_IPC_LOGGING) += qcom_ipc_logging.o
qcom_ipc_logging-y := ipc_logging.o ipc_logging_debug.o
qcom_ipc_logging-$(CONFIG_IPC_LOGGING_CDEV) += ipc_logging_cdev.o
libftrace-y := ftrace.o

1095
kernel/trace/ipc_logging.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,197 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/idr.h>
#include <linux/ipc_logging.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include "ipc_logging_private.h"
#define IPL_CDEV_MAX 255
static dev_t cdev_devt;
static struct class *cdev_class;
static DEFINE_IDA(ipl_minor_ida);
static void dfunc_string(struct encode_context *ectxt, struct decode_context *dctxt)
{
tsv_timestamp_read(ectxt, dctxt, "");
tsv_qtimer_read(ectxt, dctxt, " ");
tsv_byte_array_read(ectxt, dctxt, "");
/* add trailing \n if necessary */
if (*(dctxt->buff - 1) != '\n') {
if (dctxt->size) {
++dctxt->buff;
--dctxt->size;
}
*(dctxt->buff - 1) = '\n';
}
}
static int debug_log(struct ipc_log_context *ilctxt, char *buff, int size, int cont)
{
int i = 0;
int ret;
if (size < MAX_MSG_DECODED_SIZE) {
pr_err("%s: buffer size %d < %d\n", __func__, size, MAX_MSG_DECODED_SIZE);
return -ENOMEM;
}
do {
i = ipc_log_extract(ilctxt, buff, size - 1);
if (cont && i == 0) {
ret = wait_for_completion_interruptible(&ilctxt->read_avail);
if (ret < 0)
return ret;
}
} while (cont && i == 0);
return i;
}
static char *ipc_log_cdev_devnode(const struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "ipc_logging/%s", dev_name(dev));
}
static int ipc_log_cdev_open(struct inode *inode, struct file *filp)
{
struct ipc_log_cdev *ipl_cdev;
ipl_cdev = container_of(inode->i_cdev, struct ipc_log_cdev, cdev);
filp->private_data = container_of(ipl_cdev, struct ipc_log_context, cdev);
return 0;
}
/*
* VFS Read operation which dispatches the call to the DevFS read command stored in
* file->private_data.
*
* @filp File structure
* @buff user buffer
* @count size of user buffer
* @offp file position to read from (only a value of 0 is accepted)
*
* @returns = 0 end of file
* > 0 number of bytes read
* < 0 error
*/
static ssize_t ipc_log_cdev_read(struct file *filp, char __user *buff, size_t count, loff_t *offp)
{
int ret, bsize;
char *buffer;
struct ipc_log_context *ilctxt;
ilctxt = filp->private_data;
ret = kref_get_unless_zero(&ilctxt->refcount) ? 0 : -EIO;
if (ret)
return ret;
buffer = kmalloc(count, GFP_KERNEL);
if (!buffer) {
bsize = -ENOMEM;
goto done;
}
/* only support non-continuous mode */
bsize = debug_log(ilctxt, buffer, count, 0);
if (bsize > 0) {
if (copy_to_user(buff, buffer, bsize)) {
bsize = -EFAULT;
kfree(buffer);
goto done;
}
*offp += bsize;
}
kfree(buffer);
done:
ipc_log_context_put(ilctxt);
return bsize;
}
static const struct file_operations cdev_fops = {
.owner = THIS_MODULE,
.open = ipc_log_cdev_open,
.read = ipc_log_cdev_read,
};
void ipc_log_cdev_remove(struct ipc_log_context *ilctxt)
{
if (ilctxt->cdev.dev.class) {
cdev_device_del(&ilctxt->cdev.cdev, &ilctxt->cdev.dev);
ida_free(&ipl_minor_ida, (unsigned int)MINOR(ilctxt->cdev.dev.devt));
}
}
EXPORT_SYMBOL(ipc_log_cdev_remove);
void ipc_log_cdev_create(struct ipc_log_context *ilctxt, const char *mod_name)
{
int ret;
int minor;
dev_t devno;
if (!cdev_class) {
pr_err("%s: %s no device class created\n", __func__, mod_name);
return;
}
minor = ida_alloc_range(&ipl_minor_ida, 0, IPL_CDEV_MAX, GFP_KERNEL);
if (minor < 0) {
pr_err("%s: %s failed to alloc ipl minor number %d\n", __func__, mod_name, minor);
return;
}
devno = MKDEV(MAJOR(cdev_devt), minor);
device_initialize(&ilctxt->cdev.dev);
ilctxt->cdev.dev.devt = devno;
ilctxt->cdev.dev.class = cdev_class;
dev_set_name(&ilctxt->cdev.dev, "%s", mod_name);
cdev_init(&ilctxt->cdev.cdev, &cdev_fops);
ret = cdev_device_add(&ilctxt->cdev.cdev, &ilctxt->cdev.dev);
if (ret) {
pr_err("%s: unable to add ipl cdev %s, %d\n", __func__, mod_name, ret);
ilctxt->cdev.dev.class = NULL;
ida_free(&ipl_minor_ida, (unsigned int)minor);
put_device(&ilctxt->cdev.dev);
return;
}
add_deserialization_func((void *)ilctxt, TSV_TYPE_STRING, dfunc_string);
}
EXPORT_SYMBOL(ipc_log_cdev_create);
void ipc_log_cdev_init(void)
{
int ret;
cdev_class = NULL;
ret = alloc_chrdev_region(&cdev_devt, 0, IPL_CDEV_MAX, "ipc_logging");
if (ret) {
pr_err("%s: unable to create ipl cdev regoin %d\n", __func__, ret);
return;
}
cdev_class = class_create("ipc_logging");
if (IS_ERR(cdev_class)) {
pr_err("%s: unable to create ipl cdev class %ld\n", __func__, PTR_ERR(cdev_class));
cdev_class = NULL;
unregister_chrdev_region(cdev_devt, IPL_CDEV_MAX);
return;
}
cdev_class->devnode = ipc_log_cdev_devnode;
}
EXPORT_SYMBOL(ipc_log_cdev_init);

View File

@@ -0,0 +1,191 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012-2022 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/jiffies.h>
#include <linux/debugfs.h>
#include <linux/io.h>
#include <linux/idr.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/delay.h>
#include <linux/completion.h>
#include <linux/ipc_logging.h>
#include "ipc_logging_private.h"
static DEFINE_MUTEX(ipc_log_debugfs_init_lock);
static struct dentry *root_dent;
static int debug_log(struct ipc_log_context *ilctxt,
char *buff, int size, int cont)
{
int i = 0;
int ret;
if (size < MAX_MSG_DECODED_SIZE) {
pr_err("%s: buffer size %d < %d\n", __func__, size,
MAX_MSG_DECODED_SIZE);
return -ENOMEM;
}
do {
i = ipc_log_extract(ilctxt, buff, size - 1);
if (cont && i == 0) {
ret = wait_for_completion_interruptible(
&ilctxt->read_avail);
if (ret < 0)
return ret;
}
} while (cont && i == 0);
return i;
}
/*
* VFS Read operation helper which dispatches the call to the debugfs
* read command stored in file->private_data.
*
* @file File structure
* @buff user buffer
* @count size of user buffer
* @ppos file position to read from (only a value of 0 is accepted)
* @cont 1 = continuous mode (don't return 0 to signal end-of-file)
*
* @returns ==0 end of file
* >0 number of bytes read
* <0 error
*/
static ssize_t debug_read_helper(struct file *file, char __user *buff,
size_t count, loff_t *ppos, int cont)
{
struct ipc_log_context *ilctxt;
struct dentry *d = file->f_path.dentry;
char *buffer;
int bsize;
int r;
r = debugfs_file_get(d);
if (r)
return r;
ilctxt = file->private_data;
r = kref_get_unless_zero(&ilctxt->refcount) ? 0 : -EIO;
if (r) {
debugfs_file_put(d);
return r;
}
buffer = kmalloc(count, GFP_KERNEL);
if (!buffer) {
bsize = -ENOMEM;
goto done;
}
bsize = debug_log(ilctxt, buffer, count, cont);
if (bsize > 0) {
if (copy_to_user(buff, buffer, bsize)) {
bsize = -EFAULT;
kfree(buffer);
goto done;
}
*ppos += bsize;
}
kfree(buffer);
done:
ipc_log_context_put(ilctxt);
debugfs_file_put(d);
return bsize;
}
static ssize_t debug_read(struct file *file, char __user *buff,
size_t count, loff_t *ppos)
{
return debug_read_helper(file, buff, count, ppos, 0);
}
static ssize_t debug_read_cont(struct file *file, char __user *buff,
size_t count, loff_t *ppos)
{
return debug_read_helper(file, buff, count, ppos, 1);
}
static const struct file_operations debug_ops = {
.read = debug_read,
.open = simple_open,
};
static const struct file_operations debug_ops_cont = {
.read = debug_read_cont,
.open = simple_open,
};
static void debug_create(const char *name, mode_t mode,
struct dentry *dent,
struct ipc_log_context *ilctxt,
const struct file_operations *fops)
{
debugfs_create_file_unsafe(name, mode, dent, ilctxt, fops);
}
static void dfunc_string(struct encode_context *ectxt,
struct decode_context *dctxt)
{
tsv_timestamp_read(ectxt, dctxt, "");
tsv_qtimer_read(ectxt, dctxt, " ");
tsv_byte_array_read(ectxt, dctxt, "");
/* add trailing \n if necessary */
if (*(dctxt->buff - 1) != '\n') {
if (dctxt->size) {
++dctxt->buff;
--dctxt->size;
}
*(dctxt->buff - 1) = '\n';
}
}
void check_and_create_debugfs(void)
{
mutex_lock(&ipc_log_debugfs_init_lock);
if (!root_dent) {
root_dent = debugfs_create_dir("ipc_logging", 0);
if (IS_ERR(root_dent)) {
pr_err("%s: unable to create debugfs %ld\n",
__func__, PTR_ERR(root_dent));
root_dent = NULL;
}
}
mutex_unlock(&ipc_log_debugfs_init_lock);
}
EXPORT_SYMBOL(check_and_create_debugfs);
void create_ctx_debugfs(struct ipc_log_context *ctxt,
const char *mod_name)
{
if (!root_dent)
check_and_create_debugfs();
if (root_dent) {
ctxt->dent = debugfs_create_dir(mod_name, root_dent);
if (!IS_ERR(ctxt->dent)) {
debug_create("log", 0444, ctxt->dent,
ctxt, &debug_ops);
debug_create("log_cont", 0444, ctxt->dent,
ctxt, &debug_ops_cont);
}
}
add_deserialization_func((void *)ctxt,
TSV_TYPE_STRING, dfunc_string);
}
EXPORT_SYMBOL(create_ctx_debugfs);

View File

@@ -0,0 +1,193 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2012-2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _IPC_LOGGING_PRIVATE_H
#define _IPC_LOGGING_PRIVATE_H
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/ipc_logging.h>
#define IPC_LOG_VERSION 0x0003
#define IPC_LOG_MAX_CONTEXT_NAME_LEN 32
/**
* struct ipc_log_page_header - Individual log page header
*
* @magic: Magic number (used for log extraction)
* @nmagic: Inverse of magic number (used for log extraction)
* @page_num: Index of page (0.. N - 1) (note top bit is always set)
* @read_offset: Read offset in page
* @write_offset: Write offset in page (or 0xFFFF if full)
* @log_id: ID of logging context that owns this page
* @start_time: Scheduler clock for first write time in page
* @end_time: Scheduler clock for last write time in page
* @ctx_offset: Signed offset from page to the logging context. Used to
* optimize ram-dump extraction.
*
* @list: Linked list of pages that make up a log
* @nd_read_offset: Non-destructive read offset used for debugfs
*
* The first part of the structure defines data that is used to extract the
* logs from a memory dump and elements in this section should not be changed
* or re-ordered. New local data structures can be added to the end of the
* structure since they will be ignored by the extraction tool.
*/
struct ipc_log_page_header {
uint32_t magic;
uint32_t nmagic;
uint32_t page_num;
uint16_t read_offset;
uint16_t write_offset;
uint64_t log_id;
uint64_t start_time;
uint64_t end_time;
int64_t ctx_offset;
/* add local data structures after this point */
struct list_head list;
uint16_t nd_read_offset;
};
/**
* struct ipc_log_page - Individual log page
*
* @hdr: Log page header
* @data: Log data
*
* Each log consists of 1 to N log pages. Data size is adjusted to always fit
* the structure into a single kernel page.
*/
struct ipc_log_page {
struct ipc_log_page_header hdr;
char data[PAGE_SIZE - sizeof(struct ipc_log_page_header)];
};
/**
* struct ipc_log_cdev - Ipc logging character device
*
* @cdev: character device structure
* @dev: device structure
*
* Character device structure for ipc logging. Used to create character device nodes in DevFS.
*/
struct ipc_log_cdev {
struct cdev cdev;
struct device dev;
};
/**
* struct ipc_log_context - main logging context
*
* @magic: Magic number (used for log extraction)
* @nmagic: Inverse of magic number (used for log extraction)
* @version: IPC Logging version of log format
* @user_version: Version number for user-defined messages
* @header_size: Size of the log header which is used to determine the offset
* of ipc_log_page::data
* @log_id: Log ID (assigned when log is created)
* @name: Name of the log used to uniquely identify the log during extraction
*
* @list: List of log contexts (struct ipc_log_context)
* @page_list: List of log pages (struct ipc_log_page)
* @first_page: First page in list of logging pages
* @last_page: Last page in list of logging pages
* @write_page: Current write page
* @read_page: Current read page (for internal reads)
* @nd_read_page: Current debugfs extraction page (non-destructive)
*
* @write_avail: Number of bytes available to write in all pages
* @dent: Debugfs node for run-time log extraction
* @dfunc_info_list: List of deserialization functions
* @context_lock_lhb1: Lock for entire structure
* @read_avail: Completed when new data is added to the log
* @cdev: Ipc logging character device
*/
struct ipc_log_context {
uint32_t magic;
uint32_t nmagic;
uint32_t version;
uint16_t user_version;
uint16_t header_size;
uint64_t log_id;
char name[IPC_LOG_MAX_CONTEXT_NAME_LEN];
/* add local data structures after this point */
struct list_head list;
struct list_head page_list;
struct ipc_log_page *first_page;
struct ipc_log_page *last_page;
struct ipc_log_page *write_page;
struct ipc_log_page *read_page;
struct ipc_log_page *nd_read_page;
uint32_t write_avail;
struct dentry *dent;
struct list_head dfunc_info_list;
spinlock_t context_lock_lhb1;
struct completion read_avail;
struct kref refcount;
bool destroyed;
struct ipc_log_cdev cdev;
};
struct dfunc_info {
struct list_head list;
int type;
void (*dfunc)(struct encode_context *enc, struct decode_context *dec);
};
enum {
TSV_TYPE_INVALID,
TSV_TYPE_TIMESTAMP,
TSV_TYPE_POINTER,
TSV_TYPE_INT32,
TSV_TYPE_BYTE_ARRAY,
TSV_TYPE_QTIMER,
};
enum {
OUTPUT_DEBUGFS,
};
#define IPC_LOG_CONTEXT_MAGIC_NUM 0x25874452
#define IPC_LOGGING_MAGIC_NUM 0x52784425
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define IS_MSG_TYPE(x) (((x) > TSV_TYPE_MSG_START) && \
((x) < TSV_TYPE_MSG_END))
#define MAX_MSG_DECODED_SIZE (MAX_MSG_SIZE*4)
void ipc_log_context_free(struct kref *kref);
static inline void ipc_log_context_put(struct ipc_log_context *ilctxt)
{
kref_put(&ilctxt->refcount, ipc_log_context_free);
}
#if (defined(CONFIG_DEBUG_FS))
void check_and_create_debugfs(void);
void create_ctx_debugfs(struct ipc_log_context *ctxt,
const char *mod_name);
#else
void check_and_create_debugfs(void)
{
}
void create_ctx_debugfs(struct ipc_log_context *ctxt, const char *mod_name)
{
}
#endif
#if IS_ENABLED(CONFIG_IPC_LOGGING_CDEV)
void ipc_log_cdev_init(void);
void ipc_log_cdev_create(struct ipc_log_context *ilctxt, const char *mod_name);
void ipc_log_cdev_remove(struct ipc_log_context *ilctxt);
#else
static inline void ipc_log_cdev_init(void) {}
static inline void ipc_log_cdev_create(struct ipc_log_context *ilctxt, const char *mod_name) {}
static inline void ipc_log_cdev_remove(struct ipc_log_context *ilctxt) {}
#endif
#endif

View File

@@ -32,10 +32,6 @@
#include <trace/events/module.h>
#ifdef CONFIG_SECURITY_DEFEX
#include <linux/defex.h>
#endif
static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
static DEFINE_SPINLOCK(umh_sysctl_lock);
@@ -427,11 +423,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
if (strlen(sub_info->path) == 0)
goto out;
#ifdef CONFIG_SECURITY_DEFEX
if (task_defex_user_exec(sub_info->path)) {
goto out;
}
#endif
/*
* Set the completion pointer only if there is a waiter.
* This makes it possible to use umh_complete to free