Add samsung specific changes
This commit is contained in:
@@ -103,6 +103,7 @@ obj-$(CONFIG_RELAY) += relay.o
|
||||
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
|
||||
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
|
||||
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
|
||||
obj-$(CONFIG_MSM_SYSSTATS) += msm_sysstats.o
|
||||
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
|
||||
obj-$(CONFIG_LATENCYTOP) += latencytop.o
|
||||
obj-$(CONFIG_FUNCTION_TRACER) += trace/
|
||||
|
@@ -60,12 +60,6 @@
|
||||
|
||||
#include "audit.h"
|
||||
|
||||
// [ SEC_SELINUX_PORTING_COMMON
|
||||
#ifdef CONFIG_PROC_AVC
|
||||
#include <linux/proc_avc.h>
|
||||
#endif
|
||||
// ] SEC_SELINUX_PORTING_COMMON
|
||||
|
||||
/* No auditing will take place until audit_initialized == AUDIT_INITIALIZED.
|
||||
* (Initialization happens after skb_init is called.) */
|
||||
#define AUDIT_DISABLED -1
|
||||
@@ -548,16 +542,8 @@ static void kauditd_printk_skb(struct sk_buff *skb)
|
||||
struct nlmsghdr *nlh = nlmsg_hdr(skb);
|
||||
char *data = nlmsg_data(nlh);
|
||||
|
||||
// [ SEC_SELINUX_PORTING_COMMON
|
||||
#ifdef CONFIG_PROC_AVC
|
||||
if (nlh->nlmsg_type != AUDIT_EOE && nlh->nlmsg_type != AUDIT_NETFILTER_CFG)
|
||||
sec_avc_log("%s\n", data);
|
||||
#else
|
||||
|
||||
if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit())
|
||||
pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
|
||||
#endif
|
||||
// ] SEC_SELINUX_PORTING_COMMON
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -799,15 +785,6 @@ retry:
|
||||
} else
|
||||
goto retry;
|
||||
} else {
|
||||
// [ SEC_SELINUX_PORTING_COMMON
|
||||
#ifdef CONFIG_PROC_AVC
|
||||
struct nlmsghdr *nlh = nlmsg_hdr(skb);
|
||||
char *data = nlmsg_data(nlh);
|
||||
|
||||
if (nlh->nlmsg_type != AUDIT_EOE && nlh->nlmsg_type != AUDIT_NETFILTER_CFG)
|
||||
sec_avc_log("%s\n", data);
|
||||
#endif
|
||||
// ] SEC_SELINUX_PORTING_COMMON
|
||||
/* skb sent - drop the extra reference and continue */
|
||||
consume_skb(skb);
|
||||
failed = 0;
|
||||
|
@@ -456,40 +456,6 @@ static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
|
||||
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SAMSUNG_FREECESS
|
||||
/**
|
||||
* Check if the task is allowed to be added to the freezer group
|
||||
* only the admin can add the task to the freezer group.
|
||||
*/
|
||||
static int freezer_can_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
const struct cred *cred = current_cred(), *tcred;
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
tcred = __task_cred(task);
|
||||
|
||||
//Only system process and root have the permission.
|
||||
if ((current != task) && !(cred->euid.val == 1000 || capable(CAP_SYS_ADMIN))) {
|
||||
pr_err("Permission problem\n");
|
||||
return -EACCES;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel the attach action when it failed. It's usually used to restore the attach action.
|
||||
* But freezer attach just sends the signal, it will always success.
|
||||
* So, it doesn't need to restore any action.
|
||||
*/
|
||||
static void freezer_cancel_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct cftype files[] = {
|
||||
{
|
||||
.name = "state",
|
||||
@@ -518,9 +484,5 @@ struct cgroup_subsys freezer_cgrp_subsys = {
|
||||
.attach = freezer_attach,
|
||||
.fork = freezer_fork,
|
||||
.legacy_cftypes = files,
|
||||
#ifdef CONFIG_SAMSUNG_FREECESS
|
||||
.can_attach = freezer_can_attach,
|
||||
.cancel_attach = freezer_cancel_attach,
|
||||
#endif
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(freezer_cgrp_subsys);
|
||||
|
@@ -120,6 +120,18 @@ config DMA_RESTRICTED_POOL
|
||||
and <kernel/dma/swiotlb.c>.
|
||||
If unsure, say "n".
|
||||
|
||||
config SWIOTLB_NONLINEAR
|
||||
bool "Allow swiotlb to use non-linear memory as bounce buffers"
|
||||
depends on SWIOTLB
|
||||
help
|
||||
This allows swiotlb driver to work with memory regions where
|
||||
physical to virtual address translations can't be done using APIs
|
||||
such as phys_to_virt. These could be reserved memory regions that
|
||||
are not mapped by default or could be seen as "device" memory
|
||||
accessed via ioremap().
|
||||
|
||||
If unsure, say "n".
|
||||
|
||||
#
|
||||
# Should be selected if we can mmap non-coherent mappings to userspace.
|
||||
# The only thing that is really required is a way to set an uncached bit
|
||||
|
@@ -96,6 +96,16 @@ static struct io_tlb_mem io_tlb_default_mem;
|
||||
|
||||
#endif /* CONFIG_SWIOTLB_DYNAMIC */
|
||||
|
||||
#ifdef CONFIG_SWIOTLB_NONLINEAR
|
||||
phys_addr_t io_tlb_start, io_tlb_end;
|
||||
static unsigned long io_tlb_nslabs;
|
||||
static char *io_tlb_vstart;
|
||||
|
||||
static inline unsigned char *swiotlb_phys_to_virt(phys_addr_t tlb_addr);
|
||||
#else
|
||||
#define swiotlb_phys_to_virt phys_to_virt
|
||||
#endif
|
||||
|
||||
static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
|
||||
static unsigned long default_nareas;
|
||||
|
||||
@@ -266,7 +276,7 @@ void __init swiotlb_update_mem_attributes(void)
|
||||
static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
|
||||
unsigned long nslabs, bool late_alloc, unsigned int nareas)
|
||||
{
|
||||
void *vaddr = phys_to_virt(start);
|
||||
void *vaddr = swiotlb_phys_to_virt(start);
|
||||
unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
|
||||
|
||||
mem->nslabs = nslabs;
|
||||
@@ -518,6 +528,63 @@ error_area:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SWIOTLB_NONLINEAR
|
||||
static int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
|
||||
{
|
||||
struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
|
||||
unsigned long bytes = nslabs << IO_TLB_SHIFT;
|
||||
unsigned int area_order;
|
||||
|
||||
/* protect against double initialization */
|
||||
if (WARN_ON_ONCE(mem->nslabs))
|
||||
return -ENOMEM;
|
||||
|
||||
if (!default_nareas)
|
||||
swiotlb_adjust_nareas(num_possible_cpus());
|
||||
|
||||
area_order = get_order(array_size(sizeof(*mem->areas),
|
||||
default_nareas));
|
||||
mem->areas = (struct io_tlb_area *)
|
||||
__get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
|
||||
if (!mem->areas)
|
||||
return -ENOMEM;
|
||||
|
||||
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(array_size(sizeof(*mem->slots), nslabs)));
|
||||
if (!mem->slots)
|
||||
goto error_slots;
|
||||
|
||||
set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
|
||||
swiotlb_init_io_tlb_pool(mem, io_tlb_start, nslabs, true, default_nareas);
|
||||
add_mem_pool(&io_tlb_default_mem, mem);
|
||||
|
||||
swiotlb_print_info();
|
||||
return 0;
|
||||
|
||||
error_slots:
|
||||
free_pages((unsigned long)mem->areas, area_order);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int swiotlb_late_init_with_tblpaddr(char *tlb,
|
||||
phys_addr_t tlb_paddr, unsigned long nslabs)
|
||||
{
|
||||
unsigned long bytes;
|
||||
|
||||
if (io_tlb_start)
|
||||
return -EBUSY;
|
||||
|
||||
bytes = nslabs << IO_TLB_SHIFT;
|
||||
io_tlb_nslabs = nslabs;
|
||||
io_tlb_start = tlb_paddr;
|
||||
io_tlb_vstart = tlb;
|
||||
io_tlb_end = io_tlb_start + bytes;
|
||||
|
||||
return swiotlb_late_init_with_tbl(tlb, nslabs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(swiotlb_late_init_with_tblpaddr);
|
||||
#endif /* CONFIG_SWIOTLB_NONLINEAR */
|
||||
|
||||
void __init swiotlb_exit(void)
|
||||
{
|
||||
struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
|
||||
@@ -829,6 +896,13 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
|
||||
return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SWIOTLB_NONLINEAR
|
||||
static inline unsigned char *swiotlb_phys_to_virt(phys_addr_t tlb_addr)
|
||||
{
|
||||
return (unsigned char *)(io_tlb_vstart + (tlb_addr - io_tlb_start));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Bounce: copy the swiotlb buffer from or back to the original dma location
|
||||
*/
|
||||
@@ -1431,6 +1505,10 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
|
||||
swiotlb_release_slots(dev, tlb_addr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SWIOTLB_NONLINEAR
|
||||
EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
|
||||
#endif
|
||||
|
||||
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
@@ -1481,6 +1559,15 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
|
||||
arch_sync_dma_for_device(swiotlb_addr, size, dir);
|
||||
return dma_addr;
|
||||
}
|
||||
#ifdef CONFIG_SWIOTLB_NONLINEAR
|
||||
EXPORT_SYMBOL_GPL(swiotlb_map);
|
||||
|
||||
size_t swiotlb_max_mapping_size(struct device *dev)
|
||||
{
|
||||
return 4096;
|
||||
}
|
||||
#else
|
||||
|
||||
|
||||
size_t swiotlb_max_mapping_size(struct device *dev)
|
||||
{
|
||||
@@ -1497,6 +1584,7 @@ size_t swiotlb_max_mapping_size(struct device *dev)
|
||||
|
||||
return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* is_swiotlb_allocated() - check if the default software IO TLB is initialized
|
||||
|
@@ -76,10 +76,6 @@
|
||||
#include <trace/hooks/mm.h>
|
||||
#include <trace/hooks/dtask.h>
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
#include <linux/defex.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The default value should be high enough to not crash a system that randomly
|
||||
* crashes its kernel from time to time, but low enough to at least not permit
|
||||
@@ -822,9 +818,6 @@ void __noreturn do_exit(long code)
|
||||
|
||||
synchronize_group_exit(tsk, code);
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
task_defex_zero_creds(current);
|
||||
#endif
|
||||
WARN_ON(tsk->plug);
|
||||
|
||||
profile_task_exit(tsk);
|
||||
|
@@ -100,7 +100,6 @@
|
||||
#include <linux/user_events.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/cpufreq_times.h>
|
||||
#include <linux/task_integrity.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
@@ -120,10 +119,6 @@
|
||||
#include <linux/kdp.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
#include <linux/defex.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Minimum number of threads to boot the kernel
|
||||
*/
|
||||
@@ -2008,57 +2003,6 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
|
||||
task->signal->pids[type] = pid;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FIVE
|
||||
static int dup_task_integrity(unsigned long clone_flags,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (clone_flags & CLONE_VM) {
|
||||
task_integrity_get(TASK_INTEGRITY(current));
|
||||
task_integrity_assign(tsk, TASK_INTEGRITY(current));
|
||||
} else {
|
||||
task_integrity_assign(tsk, task_integrity_alloc());
|
||||
|
||||
if (!TASK_INTEGRITY(tsk))
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void task_integrity_cleanup(struct task_struct *tsk)
|
||||
{
|
||||
task_integrity_put(TASK_INTEGRITY(tsk));
|
||||
}
|
||||
|
||||
static inline int task_integrity_apply(unsigned long clone_flags,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
int ret = 0;
|
||||
if (!(clone_flags & CLONE_VM))
|
||||
ret = five_fork(current, tsk);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static inline int dup_task_integrity(unsigned long clone_flags,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void task_integrity_cleanup(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int task_integrity_apply(unsigned long clone_flags,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void rcu_copy_process(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
@@ -2577,14 +2521,9 @@ __latent_entropy struct task_struct *copy_process(
|
||||
goto bad_fork_cleanup_perf;
|
||||
/* copy all the process information */
|
||||
shm_init_task(p);
|
||||
retval = dup_task_integrity(clone_flags, p);
|
||||
retval = security_task_alloc(p, clone_flags);
|
||||
if (retval)
|
||||
goto bad_fork_cleanup_audit;
|
||||
retval = security_task_alloc(p, clone_flags);
|
||||
if (retval) {
|
||||
task_integrity_cleanup(p);
|
||||
goto bad_fork_cleanup_audit;
|
||||
}
|
||||
retval = copy_semundo(clone_flags, p);
|
||||
if (retval)
|
||||
goto bad_fork_cleanup_security;
|
||||
@@ -2763,10 +2702,6 @@ __latent_entropy struct task_struct *copy_process(
|
||||
goto bad_fork_cancel_cgroup;
|
||||
}
|
||||
|
||||
retval = task_integrity_apply(clone_flags, p);
|
||||
if (retval)
|
||||
goto bad_fork_cancel_cgroup;
|
||||
|
||||
/* No more failure paths after this point. */
|
||||
|
||||
/*
|
||||
@@ -3034,9 +2969,6 @@ pid_t kernel_clone(struct kernel_clone_args *args)
|
||||
pid = get_task_pid(p, PIDTYPE_PID);
|
||||
nr = pid_vnr(pid);
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
task_defex_zero_creds(p);
|
||||
#endif
|
||||
if (clone_flags & CLONE_PARENT_SETTID)
|
||||
put_user(nr, args->parent_tid);
|
||||
|
||||
|
695
kernel/msm_sysstats.c
Normal file
695
kernel/msm_sysstats.c
Normal file
@@ -0,0 +1,695 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
|
||||
* Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/msm_sysstats.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <net/genetlink.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-resv.h>
|
||||
|
||||
#include <linux/qcom_dma_heap.h>
|
||||
|
||||
struct tgid_iter {
|
||||
unsigned int tgid;
|
||||
struct task_struct *task;
|
||||
};
|
||||
|
||||
static struct genl_family family;
|
||||
|
||||
static u64 (*sysstats_kgsl_get_stats)(pid_t pid);
|
||||
|
||||
static DEFINE_PER_CPU(__u32, sysstats_seqnum);
|
||||
#define SYSSTATS_CMD_ATTR_MAX 3
|
||||
static const struct nla_policy sysstats_cmd_get_policy[SYSSTATS_CMD_ATTR_MAX + 1] = {
|
||||
[SYSSTATS_TASK_CMD_ATTR_PID] = { .type = NLA_U32 },
|
||||
[SYSSTATS_TASK_CMD_ATTR_FOREACH] = { .type = NLA_U32 },
|
||||
[SYSSTATS_TASK_CMD_ATTR_PIDS_OF_NAME] = { .type = NLA_NUL_STRING}};
|
||||
/*
|
||||
* The below dummy function is a means to get rid of calling
|
||||
* callbacks with out any external sync.
|
||||
*/
|
||||
static u64 sysstats_kgsl_stats(pid_t pid)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sysstats_register_kgsl_stats_cb(u64 (*cb)(pid_t pid))
|
||||
{
|
||||
sysstats_kgsl_get_stats = cb;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sysstats_register_kgsl_stats_cb);
|
||||
|
||||
void sysstats_unregister_kgsl_stats_cb(void)
|
||||
{
|
||||
sysstats_kgsl_get_stats = sysstats_kgsl_stats;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sysstats_unregister_kgsl_stats_cb);
|
||||
|
||||
static int sysstats_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
|
||||
struct genl_info *info)
|
||||
{
|
||||
const struct nla_policy *policy = NULL;
|
||||
|
||||
switch (ops->cmd) {
|
||||
case SYSSTATS_TASK_CMD_GET:
|
||||
case SYSSTATS_PIDS_CMD_GET:
|
||||
policy = sysstats_cmd_get_policy;
|
||||
break;
|
||||
case SYSSTATS_MEMINFO_CMD_GET:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return nlmsg_validate_deprecated(info->nlhdr, GENL_HDRLEN,
|
||||
SYSSTATS_CMD_ATTR_MAX, policy,
|
||||
info->extack);
|
||||
}
|
||||
|
||||
static int send_reply(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
|
||||
void *reply = genlmsg_data(genlhdr);
|
||||
|
||||
genlmsg_end(skb, reply);
|
||||
|
||||
return genlmsg_reply(skb, info);
|
||||
}
|
||||
|
||||
static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
|
||||
size_t size)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
void *reply;
|
||||
|
||||
skb = genlmsg_new(size, GFP_KERNEL);
|
||||
if (!skb)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!info) {
|
||||
int seq = this_cpu_inc_return(sysstats_seqnum) - 1;
|
||||
|
||||
reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
|
||||
} else
|
||||
reply = genlmsg_put_reply(skb, info, &family, 0, cmd);
|
||||
if (reply == NULL) {
|
||||
nlmsg_free(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*skbp = skb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct task_struct *find_lock_task_mm(struct task_struct *p)
|
||||
{
|
||||
struct task_struct *t;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for_each_thread(p, t) {
|
||||
task_lock(t);
|
||||
if (likely(t->mm))
|
||||
goto found;
|
||||
task_unlock(t);
|
||||
}
|
||||
t = NULL;
|
||||
found:
|
||||
rcu_read_unlock();
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
static struct sighand_struct *sysstats_lock_task_sighand(struct task_struct *tsk,
|
||||
unsigned long *flags)
|
||||
{
|
||||
struct sighand_struct *sighand;
|
||||
|
||||
rcu_read_lock();
|
||||
for (;;) {
|
||||
sighand = rcu_dereference(tsk->sighand);
|
||||
if (unlikely(sighand == NULL))
|
||||
break;
|
||||
|
||||
spin_lock_irqsave(&sighand->siglock, *flags);
|
||||
if (likely(sighand == tsk->sighand))
|
||||
break;
|
||||
spin_unlock_irqrestore(&sighand->siglock, *flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return sighand;
|
||||
}
|
||||
|
||||
static bool is_system_dmabufheap(struct dma_buf *dmabuf)
|
||||
{
|
||||
if (!strcmp(dmabuf->exp_name, "qcom,system") ||
|
||||
!strcmp(dmabuf->exp_name, "qcom,system-uncached") ||
|
||||
!strcmp(dmabuf->exp_name, "system-secure") ||
|
||||
!strcmp(dmabuf->exp_name, "qcom,secure-pixel") ||
|
||||
!strcmp(dmabuf->exp_name, "qcom,secure-non-pixel"))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int get_dma_info(const void *data, struct file *file, unsigned int n)
|
||||
{
|
||||
struct dma_buf *dmabuf;
|
||||
unsigned long *size = (unsigned long *)data;
|
||||
|
||||
if (!qcom_is_dma_buf_file(file))
|
||||
return 0;
|
||||
|
||||
dmabuf = (struct dma_buf *)file->private_data;
|
||||
if (is_system_dmabufheap(dmabuf))
|
||||
*size += dmabuf->size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long get_task_unreclaimable_info(struct task_struct *task)
|
||||
{
|
||||
struct task_struct *thread;
|
||||
struct files_struct *files;
|
||||
struct files_struct *group_leader_files = NULL;
|
||||
unsigned long size = 0;
|
||||
int ret = 0;
|
||||
|
||||
for_each_thread(task, thread) {
|
||||
/* task is already locked don't lock/unlock again. */
|
||||
if (task != thread)
|
||||
task_lock(thread);
|
||||
if (unlikely(!group_leader_files))
|
||||
group_leader_files = task->group_leader->files;
|
||||
files = thread->files;
|
||||
if (files && (group_leader_files != files ||
|
||||
thread == task->group_leader))
|
||||
ret = iterate_fd(files, 0, get_dma_info, &size);
|
||||
if (task != thread)
|
||||
task_unlock(thread);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return size >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static unsigned long get_system_unreclaimble_info(void)
|
||||
{
|
||||
struct task_struct *task;
|
||||
unsigned long size = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_process(task) {
|
||||
task_lock(task);
|
||||
size += get_task_unreclaimable_info(task);
|
||||
task_unlock(task);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/* Account the kgsl information. */
|
||||
size += sysstats_kgsl_get_stats(-1) >> PAGE_SHIFT;
|
||||
|
||||
return size;
|
||||
}
|
||||
static char *nla_strdup_cust(const struct nlattr *nla, gfp_t flags)
|
||||
{
|
||||
size_t srclen = nla_len(nla);
|
||||
char *src = nla_data(nla), *dst;
|
||||
|
||||
if (srclen > 0 && src[srclen - 1] == '\0')
|
||||
srclen--;
|
||||
|
||||
dst = kmalloc(srclen + 1, flags);
|
||||
if (dst != NULL) {
|
||||
memcpy(dst, src, srclen);
|
||||
dst[srclen] = '\0';
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
static int sysstats_task_cmd_attr_pid(struct genl_info *info)
|
||||
{
|
||||
struct sysstats_task *stats;
|
||||
struct sk_buff *rep_skb;
|
||||
struct nlattr *ret;
|
||||
struct task_struct *tsk;
|
||||
struct task_struct *p;
|
||||
size_t size;
|
||||
u32 pid;
|
||||
int rc;
|
||||
u64 utime, stime;
|
||||
const struct cred *tcred;
|
||||
#ifdef CONFIG_CPUSETS
|
||||
struct cgroup_subsys_state *css;
|
||||
#endif
|
||||
unsigned long flags;
|
||||
struct signal_struct *sig;
|
||||
|
||||
size = nla_total_size_64bit(sizeof(struct sysstats_task));
|
||||
|
||||
rc = prepare_reply(info, SYSSTATS_TASK_CMD_NEW, &rep_skb, size);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
rc = -EINVAL;
|
||||
pid = nla_get_u32(info->attrs[SYSSTATS_TASK_CMD_ATTR_PID]);
|
||||
|
||||
ret = nla_reserve_64bit(rep_skb, SYSSTATS_TASK_TYPE_STATS,
|
||||
sizeof(struct sysstats_task), SYSSTATS_TYPE_NULL);
|
||||
if (!ret)
|
||||
goto err;
|
||||
|
||||
stats = nla_data(ret);
|
||||
|
||||
rcu_read_lock();
|
||||
tsk = find_task_by_vpid(pid);
|
||||
if (tsk)
|
||||
get_task_struct(tsk);
|
||||
rcu_read_unlock();
|
||||
if (!tsk) {
|
||||
rc = -ESRCH;
|
||||
goto err;
|
||||
}
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->pid = task_pid_nr_ns(tsk, task_active_pid_ns(current));
|
||||
p = find_lock_task_mm(tsk);
|
||||
if (p) {
|
||||
__acquire(p->alloc_lock);
|
||||
#define K(x) ((x) << (PAGE_SHIFT - 10))
|
||||
stats->anon_rss = K(get_mm_counter(p->mm, MM_ANONPAGES));
|
||||
stats->file_rss = K(get_mm_counter(p->mm, MM_FILEPAGES));
|
||||
stats->shmem_rss = K(get_mm_counter(p->mm, MM_SHMEMPAGES));
|
||||
stats->swap_rss = K(get_mm_counter(p->mm, MM_SWAPENTS));
|
||||
stats->unreclaimable = K(get_task_unreclaimable_info(p));
|
||||
#undef K
|
||||
task_unlock(p);
|
||||
}
|
||||
|
||||
stats->unreclaimable += sysstats_kgsl_get_stats(stats->pid) >> 10;
|
||||
|
||||
task_cputime(tsk, &utime, &stime);
|
||||
stats->utime = div_u64(utime, NSEC_PER_USEC);
|
||||
stats->stime = div_u64(stime, NSEC_PER_USEC);
|
||||
|
||||
if (sysstats_lock_task_sighand(tsk, &flags)) {
|
||||
sig = tsk->signal;
|
||||
stats->cutime = sig->cutime;
|
||||
stats->cstime = sig->cstime;
|
||||
unlock_task_sighand(tsk, &flags);
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
tcred = __task_cred(tsk);
|
||||
stats->uid = from_kuid_munged(current_user_ns(), tcred->uid);
|
||||
stats->ppid = pid_alive(tsk) ?
|
||||
task_tgid_nr_ns(rcu_dereference(tsk->real_parent),
|
||||
task_active_pid_ns(current)) : 0;
|
||||
rcu_read_unlock();
|
||||
|
||||
strscpy(stats->name, tsk->comm, sizeof(stats->name));
|
||||
|
||||
#ifdef CONFIG_CPUSETS
|
||||
css = task_get_css(tsk, cpuset_cgrp_id);
|
||||
cgroup_path_ns(css->cgroup, stats->state, sizeof(stats->state),
|
||||
current->nsproxy->cgroup_ns);
|
||||
css_put(css);
|
||||
#endif
|
||||
|
||||
put_task_struct(tsk);
|
||||
|
||||
return send_reply(rep_skb, info);
|
||||
err:
|
||||
nlmsg_free(rep_skb);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int sysstats_task_user_cmd(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
if (info->attrs[SYSSTATS_TASK_CMD_ATTR_PID])
|
||||
return sysstats_task_cmd_attr_pid(info);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
|
||||
{
|
||||
struct pid *pid;
|
||||
|
||||
if (iter.task)
|
||||
put_task_struct(iter.task);
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
iter.task = NULL;
|
||||
pid = idr_get_next(&ns->idr, &iter.tgid);
|
||||
if (pid) {
|
||||
iter.tgid = pid_nr_ns(pid, ns);
|
||||
iter.task = pid_task(pid, PIDTYPE_TGID);
|
||||
if (!iter.task) {
|
||||
iter.tgid += 1;
|
||||
goto retry;
|
||||
}
|
||||
get_task_struct(iter.task);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return iter;
|
||||
}
|
||||
|
||||
static int sysstats_all_pids_of_name(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct pid_namespace *ns = task_active_pid_ns(current);
|
||||
struct tgid_iter iter;
|
||||
void *reply;
|
||||
struct nlattr *attr;
|
||||
struct nlattr *nla;
|
||||
struct sysstats_pid *stats;
|
||||
char *comm;
|
||||
|
||||
nla = nla_find(nlmsg_attrdata(cb->nlh, GENL_HDRLEN),
|
||||
nlmsg_attrlen(cb->nlh, GENL_HDRLEN),
|
||||
SYSSTATS_TASK_CMD_ATTR_PIDS_OF_NAME);
|
||||
if (!nla)
|
||||
goto out;
|
||||
|
||||
comm = nla_strdup_cust(nla, GFP_KERNEL);
|
||||
if (!comm)
|
||||
goto out;
|
||||
|
||||
iter.tgid = cb->args[0];
|
||||
iter.task = NULL;
|
||||
for (iter = next_tgid(ns, iter); iter.task;
|
||||
iter.tgid += 1, iter = next_tgid(ns, iter)) {
|
||||
|
||||
if (strcmp(iter.task->comm, comm))
|
||||
continue;
|
||||
reply = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
|
||||
cb->nlh->nlmsg_seq, &family, 0, SYSSTATS_PIDS_CMD_GET);
|
||||
if (reply == NULL) {
|
||||
put_task_struct(iter.task);
|
||||
break;
|
||||
}
|
||||
attr = nla_reserve(skb, SYSSTATS_PID_TYPE_STATS,
|
||||
sizeof(struct sysstats_pid));
|
||||
if (!attr) {
|
||||
put_task_struct(iter.task);
|
||||
genlmsg_cancel(skb, reply);
|
||||
break;
|
||||
}
|
||||
stats = nla_data(attr);
|
||||
memset(stats, 0, sizeof(struct sysstats_pid));
|
||||
rcu_read_lock();
|
||||
stats->pid = task_pid_nr_ns(iter.task,
|
||||
task_active_pid_ns(current));
|
||||
rcu_read_unlock();
|
||||
genlmsg_end(skb, reply);
|
||||
}
|
||||
cb->args[0] = iter.tgid;
|
||||
kfree(comm);
|
||||
out:
|
||||
return skb->len;
|
||||
}
|
||||
|
||||
static int sysstats_task_foreach(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct pid_namespace *ns = task_active_pid_ns(current);
|
||||
struct tgid_iter iter;
|
||||
void *reply;
|
||||
struct nlattr *attr;
|
||||
struct nlattr *nla;
|
||||
struct sysstats_task *stats;
|
||||
struct task_struct *p;
|
||||
short oom_score;
|
||||
short oom_score_min;
|
||||
short oom_score_max;
|
||||
u32 buf;
|
||||
|
||||
nla = nla_find(nlmsg_attrdata(cb->nlh, GENL_HDRLEN),
|
||||
nlmsg_attrlen(cb->nlh, GENL_HDRLEN),
|
||||
SYSSTATS_TASK_CMD_ATTR_FOREACH);
|
||||
|
||||
if (!nla)
|
||||
goto out;
|
||||
|
||||
buf = nla_get_u32(nla);
|
||||
oom_score_min = (short) (buf & 0xFFFF);
|
||||
oom_score_max = (short) ((buf >> 16) & 0xFFFF);
|
||||
|
||||
iter.tgid = cb->args[0];
|
||||
iter.task = NULL;
|
||||
for (iter = next_tgid(ns, iter); iter.task;
|
||||
iter.tgid += 1, iter = next_tgid(ns, iter)) {
|
||||
|
||||
if (iter.task->flags & PF_KTHREAD)
|
||||
continue;
|
||||
|
||||
oom_score = iter.task->signal->oom_score_adj;
|
||||
if ((oom_score < oom_score_min)
|
||||
|| (oom_score > oom_score_max))
|
||||
continue;
|
||||
|
||||
reply = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
|
||||
cb->nlh->nlmsg_seq, &family, 0, SYSSTATS_TASK_CMD_GET);
|
||||
if (reply == NULL) {
|
||||
put_task_struct(iter.task);
|
||||
break;
|
||||
}
|
||||
attr = nla_reserve(skb, SYSSTATS_TASK_TYPE_FOREACH,
|
||||
sizeof(struct sysstats_task));
|
||||
if (!attr) {
|
||||
put_task_struct(iter.task);
|
||||
genlmsg_cancel(skb, reply);
|
||||
break;
|
||||
}
|
||||
stats = nla_data(attr);
|
||||
memset(stats, 0, sizeof(struct sysstats_task));
|
||||
rcu_read_lock();
|
||||
stats->pid = task_pid_nr_ns(iter.task,
|
||||
task_active_pid_ns(current));
|
||||
stats->oom_score = iter.task->signal->oom_score_adj;
|
||||
rcu_read_unlock();
|
||||
p = find_lock_task_mm(iter.task);
|
||||
if (p) {
|
||||
#define K(x) ((x) << (PAGE_SHIFT - 10))
|
||||
__acquire(p->alloc_lock);
|
||||
stats->anon_rss =
|
||||
K(get_mm_counter(p->mm, MM_ANONPAGES));
|
||||
stats->file_rss =
|
||||
K(get_mm_counter(p->mm, MM_FILEPAGES));
|
||||
stats->shmem_rss =
|
||||
K(get_mm_counter(p->mm, MM_SHMEMPAGES));
|
||||
stats->swap_rss =
|
||||
K(get_mm_counter(p->mm, MM_SWAPENTS));
|
||||
stats->unreclaimable = K(get_task_unreclaimable_info(p));
|
||||
task_unlock(p);
|
||||
#undef K
|
||||
}
|
||||
genlmsg_end(skb, reply);
|
||||
}
|
||||
|
||||
cb->args[0] = iter.tgid;
|
||||
out:
|
||||
return skb->len;
|
||||
}
|
||||
|
||||
#define K(x) ((x) << (PAGE_SHIFT - 10))
|
||||
#ifndef CONFIG_NUMA
|
||||
static void sysstats_fill_zoneinfo(struct sysstats_mem *stats)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
struct zone *zone;
|
||||
struct zone *node_zones;
|
||||
unsigned long zspages = 0;
|
||||
|
||||
pgdat = NODE_DATA(0);
|
||||
node_zones = pgdat->node_zones;
|
||||
|
||||
for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
|
||||
if (!populated_zone(zone))
|
||||
continue;
|
||||
|
||||
zspages += zone_page_state(zone, NR_ZSPAGES);
|
||||
if (!strcmp(zone->name, "DMA")) {
|
||||
stats->dma_nr_free =
|
||||
K(zone_page_state(zone, NR_FREE_PAGES));
|
||||
stats->dma_nr_active_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
|
||||
stats->dma_nr_inactive_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
|
||||
stats->dma_nr_active_file =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
|
||||
stats->dma_nr_inactive_file =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
|
||||
} else if (!strcmp(zone->name, "Normal")) {
|
||||
stats->normal_nr_free =
|
||||
K(zone_page_state(zone, NR_FREE_PAGES));
|
||||
stats->normal_nr_active_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
|
||||
stats->normal_nr_inactive_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
|
||||
stats->normal_nr_active_file =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
|
||||
stats->normal_nr_inactive_file =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
|
||||
} else if (!strcmp(zone->name, "HighMem")) {
|
||||
stats->highmem_nr_free =
|
||||
K(zone_page_state(zone, NR_FREE_PAGES));
|
||||
stats->highmem_nr_active_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
|
||||
stats->highmem_nr_inactive_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
|
||||
stats->highmem_nr_active_file =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
|
||||
stats->highmem_nr_inactive_file =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
|
||||
} else if (!strcmp(zone->name, "Movable")) {
|
||||
stats->movable_nr_free =
|
||||
K(zone_page_state(zone, NR_FREE_PAGES));
|
||||
stats->movable_nr_active_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
|
||||
stats->movable_nr_inactive_anon =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
|
||||
stats->movable_nr_active_file =
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
|
||||
stats->movable_nr_inactive_file =
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
|
||||
}
|
||||
}
|
||||
stats->zram_compressed = K(zspages);
|
||||
}
|
||||
#elif
|
||||
static void sysstats_fill_zoneinfo(struct sysstats_mem *stats)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static void sysstats_build(struct sysstats_mem *stats)
|
||||
{
|
||||
struct sysinfo i;
|
||||
|
||||
si_meminfo(&i);
|
||||
#ifndef CONFIG_MSM_SYSSTATS_STUB_NONEXPORTED_SYMBOLS
|
||||
si_swapinfo(&i);
|
||||
stats->swap_used = K(i.totalswap - i.freeswap);
|
||||
stats->swap_total = K(i.totalswap);
|
||||
stats->vmalloc_total = K(vmalloc_nr_pages());
|
||||
#else
|
||||
stats->swap_used = 0;
|
||||
stats->swap_total = 0;
|
||||
stats->vmalloc_total = 0;
|
||||
#endif
|
||||
stats->memtotal = K(i.totalram);
|
||||
stats->misc_reclaimable =
|
||||
K(global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
|
||||
stats->unreclaimable = K(get_system_unreclaimble_info());
|
||||
stats->buffer = K(i.bufferram);
|
||||
stats->swapcache = K(total_swapcache_pages());
|
||||
stats->slab_reclaimable =
|
||||
K(global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B));
|
||||
stats->slab_unreclaimable =
|
||||
K(global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B));
|
||||
stats->free_cma = K(global_zone_page_state(NR_FREE_CMA_PAGES));
|
||||
stats->file_mapped = K(global_node_page_state(NR_FILE_MAPPED));
|
||||
stats->kernelstack = global_node_page_state(NR_KERNEL_STACK_KB);
|
||||
stats->pagetable = K(global_node_page_state(NR_PAGETABLE));
|
||||
stats->shmem = K(i.sharedram);
|
||||
sysstats_fill_zoneinfo(stats);
|
||||
}
|
||||
#undef K
|
||||
|
||||
static int sysstats_meminfo_user_cmd(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
int rc = 0;
|
||||
struct sk_buff *rep_skb;
|
||||
struct sysstats_mem *stats;
|
||||
struct nlattr *na;
|
||||
size_t size;
|
||||
|
||||
size = nla_total_size(sizeof(struct sysstats_mem));
|
||||
|
||||
rc = prepare_reply(info, SYSSTATS_MEMINFO_CMD_NEW, &rep_skb,
|
||||
size);
|
||||
if (rc < 0)
|
||||
goto err;
|
||||
|
||||
na = nla_reserve(rep_skb, SYSSTATS_MEMINFO_TYPE_STATS,
|
||||
sizeof(struct sysstats_mem));
|
||||
if (na == NULL) {
|
||||
nlmsg_free(rep_skb);
|
||||
rc = -EMSGSIZE;
|
||||
goto err;
|
||||
}
|
||||
|
||||
stats = nla_data(na);
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
|
||||
sysstats_build(stats);
|
||||
|
||||
rc = send_reply(rep_skb, info);
|
||||
err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static const struct genl_ops sysstats_ops[] = {
|
||||
{
|
||||
.cmd = SYSSTATS_TASK_CMD_GET,
|
||||
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
|
||||
.doit = sysstats_task_user_cmd,
|
||||
.dumpit = sysstats_task_foreach,
|
||||
},
|
||||
{
|
||||
.cmd = SYSSTATS_MEMINFO_CMD_GET,
|
||||
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
|
||||
.doit = sysstats_meminfo_user_cmd,
|
||||
},
|
||||
{
|
||||
.cmd = SYSSTATS_PIDS_CMD_GET,
|
||||
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
|
||||
.dumpit = sysstats_all_pids_of_name,
|
||||
}
|
||||
};
|
||||
|
||||
static struct genl_family family __ro_after_init = {
|
||||
.name = SYSSTATS_GENL_NAME,
|
||||
.version = SYSSTATS_GENL_VERSION,
|
||||
.maxattr = SYSSTATS_CMD_ATTR_MAX,
|
||||
.module = THIS_MODULE,
|
||||
.ops = sysstats_ops,
|
||||
.n_ops = ARRAY_SIZE(sysstats_ops),
|
||||
.pre_doit = sysstats_pre_doit,
|
||||
.resv_start_op = SYSSTATS_PIDS_CMD_GET + 1,
|
||||
};
|
||||
|
||||
static int __init sysstats_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = genl_register_family(&family);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
sysstats_register_kgsl_stats_cb(sysstats_kgsl_stats);
|
||||
pr_info("registered sysstats version %d\n", SYSSTATS_GENL_VERSION);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit sysstats_exit(void)
|
||||
{
|
||||
genl_unregister_family(&family);
|
||||
}
|
||||
|
||||
module_init(sysstats_init);
|
||||
module_exit(sysstats_exit);
|
||||
MODULE_IMPORT_NS(MINIDUMP);
|
||||
MODULE_LICENSE("GPL");
|
@@ -61,7 +61,6 @@ bool crash_kexec_post_notifiers;
|
||||
int panic_on_warn __read_mostly;
|
||||
unsigned long panic_on_taint;
|
||||
bool panic_on_taint_nousertaint = false;
|
||||
bool stop_on_panic = false;
|
||||
static unsigned int warn_limit __read_mostly;
|
||||
|
||||
int panic_timeout = CONFIG_PANIC_TIMEOUT;
|
||||
@@ -398,11 +397,6 @@ void panic(const char *fmt, ...)
|
||||
|
||||
panic_print_sys_info(true);
|
||||
|
||||
if (stop_on_panic) {
|
||||
pr_emerg("stop_on_panic is called, freezing...\n");
|
||||
while(1);
|
||||
}
|
||||
|
||||
if (!panic_blink)
|
||||
panic_blink = no_blink;
|
||||
|
||||
@@ -783,7 +777,6 @@ core_param(panic_print, panic_print, ulong, 0644);
|
||||
core_param(pause_on_oops, pause_on_oops, int, 0644);
|
||||
core_param(panic_on_warn, panic_on_warn, int, 0644);
|
||||
core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
|
||||
core_param(stop_on_panic, stop_on_panic, bool, 0644);
|
||||
|
||||
static int __init oops_setup(char *s)
|
||||
{
|
||||
@@ -821,10 +814,3 @@ static int __init panic_on_taint_setup(char *s)
|
||||
return 0;
|
||||
}
|
||||
early_param("panic_on_taint", panic_on_taint_setup);
|
||||
|
||||
static int __init stop_on_panic_setup(char *s)
|
||||
{
|
||||
stop_on_panic = true;
|
||||
return 0;
|
||||
}
|
||||
early_param("stop_on_panic", stop_on_panic_setup);
|
||||
|
@@ -27,13 +27,11 @@
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/swait.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/rtc.h>
|
||||
#include <trace/events/power.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/wakeup_reason.h>
|
||||
#include <trace/hooks/suspend.h>
|
||||
#include <linux/regulator/machine.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
@@ -425,9 +423,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
|
||||
suspend_stats.failed_devs[last_dev]);
|
||||
goto Platform_finish;
|
||||
}
|
||||
|
||||
regulator_show_enabled();
|
||||
|
||||
error = platform_suspend_prepare_late(state);
|
||||
if (error)
|
||||
goto Devices_early_resume;
|
||||
@@ -632,18 +627,6 @@ static int enter_state(suspend_state_t state)
|
||||
return error;
|
||||
}
|
||||
|
||||
static void pm_suspend_marker(char *annotation)
|
||||
{
|
||||
struct timespec64 ts;
|
||||
struct rtc_time tm;
|
||||
|
||||
ktime_get_real_ts64(&ts);
|
||||
rtc_time64_to_tm(ts.tv_sec, &tm);
|
||||
pr_info("suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n",
|
||||
annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
|
||||
tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec);
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_suspend - Externally visible function for suspending the system.
|
||||
* @state: System sleep state to enter.
|
||||
@@ -658,7 +641,7 @@ int pm_suspend(suspend_state_t state)
|
||||
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
pm_suspend_marker("entry");
|
||||
pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
|
||||
error = enter_state(state);
|
||||
if (error) {
|
||||
suspend_stats.fail++;
|
||||
@@ -666,7 +649,7 @@ int pm_suspend(suspend_state_t state)
|
||||
} else {
|
||||
suspend_stats.success++;
|
||||
}
|
||||
pm_suspend_marker("exit");
|
||||
pr_info("suspend exit\n");
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL(pm_suspend);
|
||||
|
@@ -7,4 +7,4 @@ obj-$(CONFIG_PRINTK_INDEX) += index.o
|
||||
obj-$(CONFIG_PRINTK) += printk_support.o
|
||||
printk_support-y := printk_ringbuffer.o
|
||||
printk_support-$(CONFIG_SYSCTL) += sysctl.o
|
||||
printk_support-y += cx_gdsc_debug.o
|
||||
printk_support-y += cx_gdsc_debug.o
|
||||
|
@@ -33,7 +33,6 @@
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/syscall_user_dispatch.h>
|
||||
#include <linux/task_integrity.h>
|
||||
|
||||
#include <asm/syscall.h> /* for syscall_get_* */
|
||||
|
||||
@@ -1283,7 +1282,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
|
||||
long ret;
|
||||
|
||||
if (request == PTRACE_TRACEME) {
|
||||
five_ptrace(current, request);
|
||||
ret = ptrace_traceme();
|
||||
goto out;
|
||||
}
|
||||
@@ -1294,8 +1292,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
|
||||
goto out;
|
||||
}
|
||||
|
||||
five_ptrace(child, request);
|
||||
|
||||
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
|
||||
ret = ptrace_attach(child, request, addr, data);
|
||||
goto out_put_task_struct;
|
||||
@@ -1425,7 +1421,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid,
|
||||
long ret;
|
||||
|
||||
if (request == PTRACE_TRACEME) {
|
||||
five_ptrace(current, request);
|
||||
ret = ptrace_traceme();
|
||||
goto out;
|
||||
}
|
||||
@@ -1436,8 +1431,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid,
|
||||
goto out;
|
||||
}
|
||||
|
||||
five_ptrace(child, request);
|
||||
|
||||
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
|
||||
ret = ptrace_attach(child, request, addr, data);
|
||||
goto out_put_task_struct;
|
||||
|
@@ -33,3 +33,4 @@ obj-y += fair.o
|
||||
obj-y += build_policy.o
|
||||
obj-y += build_utility.o
|
||||
obj-$(CONFIG_ANDROID_VENDOR_HOOKS) += vendor_hooks.o
|
||||
obj-$(CONFIG_SCHED_WALT) += walt/
|
||||
|
41
kernel/sched/walt/Kconfig
Normal file
41
kernel/sched/walt/Kconfig
Normal file
@@ -0,0 +1,41 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# QTI WALT based scheduler
|
||||
#
|
||||
menu "QTI WALT based scheduler features"
|
||||
|
||||
config SCHED_WALT
|
||||
tristate "Support window based load tracking"
|
||||
depends on SMP
|
||||
help
|
||||
This feature will allow the scheduler to maintain a tunable window
|
||||
based set of metrics for tasks and runqueues. These metrics can be
|
||||
used to guide task placement as well as task frequency requirements
|
||||
for cpufreq governors.
|
||||
|
||||
config SCHED_WALT_DEBUG
|
||||
tristate "WALT debug module"
|
||||
depends on SCHED_WALT
|
||||
select TRACE_PREEMPT_TOGGLE
|
||||
select TRACE_IRQFLAGS
|
||||
help
|
||||
This module provides the means of debugging long preempt and
|
||||
irq disable code. This helps in identifying the scheduling
|
||||
latencies. The module rely on preemptirq trace hooks and
|
||||
print the stacktrace to the ftrace upon long preempt and irq
|
||||
events. Sysctl knobs are available for the user to configure
|
||||
the thresholds.
|
||||
|
||||
This module also used to crash the system to catch issues
|
||||
in scenarios like RT throttling and sleeping while in atomic
|
||||
context etc.
|
||||
|
||||
config SCHED_CONSERVATIVE_BOOST_LPM_BIAS
|
||||
bool "Enable LPM bias if conservative boost is enabled"
|
||||
default n
|
||||
help
|
||||
This feature will allow the scheduler to disable low power
|
||||
modes on a cpu if conservative boost is active. The cpu
|
||||
will not enter low power mode for a hysteresis time period,
|
||||
which can be configured from userspace.
|
||||
endmenu
|
10
kernel/sched/walt/Makefile
Normal file
10
kernel/sched/walt/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
KCOV_INSTRUMENT := n
|
||||
KCSAN_SANITIZE := n
|
||||
|
||||
obj-$(CONFIG_SCHED_WALT) += sched-walt.o
|
||||
sched-walt-$(CONFIG_SCHED_WALT) := walt.o boost.o sched_avg.o walt_halt.o core_ctl.o trace.o input-boost.o sysctl.o cpufreq_walt.o fixup.o walt_lb.o walt_rt.o walt_cfs.o walt_tp.o walt_config.o walt_cpufreq_cycle_cntr_driver.o walt_gclk_cycle_counter_driver.o walt_cycles.o debugfs.o pipeline.o smart_freq.o mvp_locking.o
|
||||
|
||||
obj-$(CONFIG_SCHED_WALT_DEBUG) += sched-walt-debug.o
|
||||
sched-walt-debug-$(CONFIG_SCHED_WALT_DEBUG) := walt_debug.o preemptirq_long.o
|
359
kernel/sched/walt/boost.c
Normal file
359
kernel/sched/walt/boost.c
Normal file
@@ -0,0 +1,359 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/of.h>
|
||||
|
||||
#include "walt.h"
|
||||
#include "trace.h"
|
||||
|
||||
/*
|
||||
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
|
||||
* with higher capacity than those where a task would have normally
|
||||
* ended up with their load characteristics. Any entity enabling
|
||||
* boost is responsible for disabling it as well.
|
||||
*/
|
||||
unsigned int sched_boost_type;
|
||||
enum sched_boost_policy boost_policy;
|
||||
|
||||
static DEFINE_MUTEX(boost_mutex);
|
||||
|
||||
void walt_init_tg(struct task_group *tg)
|
||||
{
|
||||
struct walt_task_group *wtg;
|
||||
|
||||
wtg = (struct walt_task_group *) tg->android_vendor_data1;
|
||||
|
||||
wtg->colocate = false;
|
||||
wtg->sched_boost_enable[NO_BOOST] = false;
|
||||
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
|
||||
wtg->sched_boost_enable[CONSERVATIVE_BOOST] = false;
|
||||
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
|
||||
wtg->sched_boost_enable[STORAGE_BOOST] = true;
|
||||
wtg->sched_boost_enable[BALANCE_BOOST] = false;
|
||||
}
|
||||
|
||||
void walt_init_topapp_tg(struct task_group *tg)
|
||||
{
|
||||
struct walt_task_group *wtg;
|
||||
|
||||
wtg = (struct walt_task_group *) tg->android_vendor_data1;
|
||||
|
||||
wtg->colocate = true;
|
||||
wtg->sched_boost_enable[NO_BOOST] = false;
|
||||
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
|
||||
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
|
||||
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
|
||||
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
|
||||
wtg->sched_boost_enable[STORAGE_BOOST] = true;
|
||||
wtg->sched_boost_enable[BALANCE_BOOST] = true;
|
||||
}
|
||||
|
||||
void walt_init_foreground_tg(struct task_group *tg)
|
||||
{
|
||||
struct walt_task_group *wtg;
|
||||
|
||||
wtg = (struct walt_task_group *) tg->android_vendor_data1;
|
||||
|
||||
wtg->colocate = false;
|
||||
wtg->sched_boost_enable[NO_BOOST] = false;
|
||||
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
|
||||
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
|
||||
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
|
||||
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
|
||||
wtg->sched_boost_enable[STORAGE_BOOST] = true;
|
||||
wtg->sched_boost_enable[BALANCE_BOOST] = true;
|
||||
}
|
||||
|
||||
void walt_init_foregroundboost_tg(struct task_group *tg)
|
||||
{
|
||||
struct walt_task_group *wtg;
|
||||
|
||||
wtg = (struct walt_task_group *) tg->android_vendor_data1;
|
||||
|
||||
wtg->colocate = false;
|
||||
wtg->sched_boost_enable[NO_BOOST] = false;
|
||||
wtg->sched_boost_enable[FULL_THROTTLE_BOOST] = true;
|
||||
wtg->sched_boost_enable[CONSERVATIVE_BOOST] =
|
||||
soc_feat(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
|
||||
wtg->sched_boost_enable[RESTRAINED_BOOST] = false;
|
||||
wtg->sched_boost_enable[STORAGE_BOOST] = true;
|
||||
wtg->sched_boost_enable[BALANCE_BOOST] = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scheduler boost type and boost policy might at first seem unrelated,
|
||||
* however, there exists a connection between them that will allow us
|
||||
* to use them interchangeably during placement decisions. We'll explain
|
||||
* the connection here in one possible way so that the implications are
|
||||
* clear when looking at placement policies.
|
||||
*
|
||||
* When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
|
||||
* When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
|
||||
* neither be none nor RESTRAINED.
|
||||
*/
|
||||
static void set_boost_policy(int type)
|
||||
{
|
||||
if (type == NO_BOOST || type == RESTRAINED_BOOST) {
|
||||
boost_policy = SCHED_BOOST_NONE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (hmp_capable()) {
|
||||
boost_policy = SCHED_BOOST_ON_BIG;
|
||||
return;
|
||||
}
|
||||
|
||||
boost_policy = SCHED_BOOST_ON_ALL;
|
||||
}
|
||||
|
||||
static bool verify_boost_params(int type)
|
||||
{
|
||||
return type >= BALANCE_BOOST_DISABLE && type <= BALANCE_BOOST;
|
||||
}
|
||||
|
||||
static void sched_no_boost_nop(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void sched_full_throttle_boost_enter(void)
|
||||
{
|
||||
core_ctl_set_boost(true);
|
||||
walt_enable_frequency_aggregation(true);
|
||||
}
|
||||
|
||||
static void sched_full_throttle_boost_exit(void)
|
||||
{
|
||||
core_ctl_set_boost(false);
|
||||
walt_enable_frequency_aggregation(false);
|
||||
}
|
||||
|
||||
static void sched_conservative_boost_enter(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void sched_conservative_boost_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void sched_restrained_boost_enter(void)
|
||||
{
|
||||
walt_enable_frequency_aggregation(true);
|
||||
}
|
||||
|
||||
static void sched_restrained_boost_exit(void)
|
||||
{
|
||||
walt_enable_frequency_aggregation(false);
|
||||
}
|
||||
|
||||
static void sched_storage_boost_enter(void)
|
||||
{
|
||||
core_ctl_set_boost(true);
|
||||
}
|
||||
|
||||
static void sched_storage_boost_exit(void)
|
||||
{
|
||||
core_ctl_set_boost(false);
|
||||
}
|
||||
|
||||
static void sched_balance_boost_enter(void)
|
||||
{
|
||||
core_ctl_set_boost(true);
|
||||
}
|
||||
|
||||
static void sched_balance_boost_exit(void)
|
||||
{
|
||||
core_ctl_set_boost(false);
|
||||
}
|
||||
|
||||
|
||||
struct sched_boost_data {
|
||||
int refcount;
|
||||
void (*enter)(void);
|
||||
void (*exit)(void);
|
||||
};
|
||||
|
||||
static struct sched_boost_data sched_boosts[] = {
|
||||
[NO_BOOST] = {
|
||||
.refcount = 0,
|
||||
.enter = sched_no_boost_nop,
|
||||
.exit = sched_no_boost_nop,
|
||||
},
|
||||
[FULL_THROTTLE_BOOST] = {
|
||||
.refcount = 0,
|
||||
.enter = sched_full_throttle_boost_enter,
|
||||
.exit = sched_full_throttle_boost_exit,
|
||||
},
|
||||
[CONSERVATIVE_BOOST] = {
|
||||
.refcount = 0,
|
||||
.enter = sched_conservative_boost_enter,
|
||||
.exit = sched_conservative_boost_exit,
|
||||
},
|
||||
[RESTRAINED_BOOST] = {
|
||||
.refcount = 0,
|
||||
.enter = sched_restrained_boost_enter,
|
||||
.exit = sched_restrained_boost_exit,
|
||||
},
|
||||
[STORAGE_BOOST] = {
|
||||
.refcount = 0,
|
||||
.enter = sched_storage_boost_enter,
|
||||
.exit = sched_storage_boost_exit,
|
||||
},
|
||||
[BALANCE_BOOST] = {
|
||||
.refcount = 0,
|
||||
.enter = sched_balance_boost_enter,
|
||||
.exit = sched_balance_boost_exit,
|
||||
},
|
||||
};
|
||||
|
||||
#define SCHED_BOOST_START FULL_THROTTLE_BOOST
|
||||
#define SCHED_BOOST_END (BALANCE_BOOST + 1)
|
||||
|
||||
static int sched_effective_boost(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* The boosts are sorted in descending order by
|
||||
* priority.
|
||||
*/
|
||||
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++) {
|
||||
if (sched_boosts[i].refcount >= 1)
|
||||
return i;
|
||||
}
|
||||
|
||||
return NO_BOOST;
|
||||
}
|
||||
|
||||
static void sched_boost_disable(int type)
|
||||
{
|
||||
struct sched_boost_data *sb = &sched_boosts[type];
|
||||
int next_boost, prev_boost = sched_boost_type;
|
||||
|
||||
if (sb->refcount <= 0)
|
||||
return;
|
||||
|
||||
sb->refcount--;
|
||||
|
||||
if (sb->refcount)
|
||||
return;
|
||||
|
||||
next_boost = sched_effective_boost();
|
||||
if (next_boost == prev_boost)
|
||||
return;
|
||||
/*
|
||||
* This boost's refcount becomes zero, so it must
|
||||
* be disabled. Disable it first and then apply
|
||||
* the next boost.
|
||||
*/
|
||||
sched_boosts[prev_boost].exit();
|
||||
sched_boosts[next_boost].enter();
|
||||
}
|
||||
|
||||
static void sched_boost_enable(int type)
|
||||
{
|
||||
struct sched_boost_data *sb = &sched_boosts[type];
|
||||
int next_boost, prev_boost = sched_boost_type;
|
||||
|
||||
sb->refcount++;
|
||||
|
||||
if (sb->refcount != 1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* This boost enable request did not come before.
|
||||
* Take this new request and find the next boost
|
||||
* by aggregating all the enabled boosts. If there
|
||||
* is a change, disable the previous boost and enable
|
||||
* the next boost.
|
||||
*/
|
||||
|
||||
next_boost = sched_effective_boost();
|
||||
if (next_boost == prev_boost)
|
||||
return;
|
||||
|
||||
sched_boosts[prev_boost].exit();
|
||||
sched_boosts[next_boost].enter();
|
||||
}
|
||||
|
||||
static void sched_boost_disable_all(void)
|
||||
{
|
||||
int i;
|
||||
int prev_boost = sched_boost_type;
|
||||
|
||||
if (prev_boost != NO_BOOST) {
|
||||
sched_boosts[prev_boost].exit();
|
||||
for (i = SCHED_BOOST_START; i < SCHED_BOOST_END; i++)
|
||||
sched_boosts[i].refcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void _sched_set_boost(int type)
|
||||
{
|
||||
if (type == 0)
|
||||
sched_boost_disable_all();
|
||||
else if (type > 0)
|
||||
sched_boost_enable(type);
|
||||
else
|
||||
sched_boost_disable(-type);
|
||||
|
||||
/*
|
||||
* sysctl_sched_boost holds the boost request from
|
||||
* user space which could be different from the
|
||||
* effectively enabled boost. Update the effective
|
||||
* boost here.
|
||||
*/
|
||||
|
||||
sched_boost_type = sched_effective_boost();
|
||||
sysctl_sched_boost = sched_boost_type;
|
||||
set_boost_policy(sysctl_sched_boost);
|
||||
trace_sched_set_boost(sysctl_sched_boost);
|
||||
}
|
||||
|
||||
int sched_set_boost(int type)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return -EAGAIN;
|
||||
|
||||
mutex_lock(&boost_mutex);
|
||||
if (verify_boost_params(type))
|
||||
_sched_set_boost(type);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
mutex_unlock(&boost_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_set_boost);
|
||||
|
||||
int sched_boost_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
unsigned int *data = (unsigned int *)table->data;
|
||||
|
||||
mutex_lock(&boost_mutex);
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (ret || !write)
|
||||
goto done;
|
||||
|
||||
if (verify_boost_params(*data))
|
||||
_sched_set_boost(*data);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
|
||||
done:
|
||||
mutex_unlock(&boost_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void walt_boost_init(void)
|
||||
{
|
||||
/* force call the callbacks for default boost */
|
||||
sched_set_boost(FULL_THROTTLE_BOOST);
|
||||
}
|
1921
kernel/sched/walt/core_ctl.c
Normal file
1921
kernel/sched/walt/core_ctl.c
Normal file
File diff suppressed because it is too large
Load Diff
1515
kernel/sched/walt/cpufreq_walt.c
Normal file
1515
kernel/sched/walt/cpufreq_walt.c
Normal file
File diff suppressed because it is too large
Load Diff
18
kernel/sched/walt/debugfs.c
Normal file
18
kernel/sched/walt/debugfs.c
Normal file
@@ -0,0 +1,18 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <trace/hooks/sched.h>
|
||||
|
||||
#include "walt.h"
|
||||
#include "trace.h"
|
||||
|
||||
unsigned int debugfs_walt_features;
|
||||
static struct dentry *debugfs_walt;
|
||||
void walt_register_debugfs(void)
|
||||
{
|
||||
debugfs_walt = debugfs_create_dir("walt", NULL);
|
||||
debugfs_create_u32("walt_features", 0644, debugfs_walt, &debugfs_walt_features);
|
||||
}
|
185
kernel/sched/walt/fixup.c
Normal file
185
kernel/sched/walt/fixup.c
Normal file
@@ -0,0 +1,185 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
|
||||
* Copyright (c) 2021-2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <trace/hooks/cpufreq.h>
|
||||
#include <trace/hooks/topology.h>
|
||||
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
unsigned int cpuinfo_max_freq_cached;
|
||||
|
||||
char sched_lib_name[LIB_PATH_LENGTH];
|
||||
char sched_lib_task[LIB_PATH_LENGTH];
|
||||
unsigned int sched_lib_mask_force;
|
||||
|
||||
static bool is_sched_lib_based_app(pid_t pid)
|
||||
{
|
||||
const char *name = NULL;
|
||||
char *libname, *lib_list;
|
||||
struct vm_area_struct *vma;
|
||||
char path_buf[LIB_PATH_LENGTH];
|
||||
char *tmp_lib_name;
|
||||
bool found = false;
|
||||
struct task_struct *p;
|
||||
struct mm_struct *mm;
|
||||
|
||||
if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0)
|
||||
return false;
|
||||
|
||||
tmp_lib_name = kmalloc(LIB_PATH_LENGTH, GFP_KERNEL);
|
||||
if (!tmp_lib_name)
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
p = pid ? get_pid_task(find_vpid(pid), PIDTYPE_PID) : get_task_struct(current);
|
||||
rcu_read_unlock();
|
||||
if (!p) {
|
||||
kfree(tmp_lib_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
mm = get_task_mm(p);
|
||||
if (mm) {
|
||||
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||
down_read(&mm->mmap_lock);
|
||||
|
||||
mas_for_each(&mas, vma, ULONG_MAX) {
|
||||
if (vma->vm_file && vma->vm_flags & VM_EXEC) {
|
||||
name = d_path(&vma->vm_file->f_path,
|
||||
path_buf, LIB_PATH_LENGTH);
|
||||
if (IS_ERR(name))
|
||||
goto release_sem;
|
||||
|
||||
strscpy(tmp_lib_name, sched_lib_name, LIB_PATH_LENGTH);
|
||||
lib_list = tmp_lib_name;
|
||||
while ((libname = strsep(&lib_list, ","))) {
|
||||
libname = skip_spaces(libname);
|
||||
if (strnstr(name, libname,
|
||||
strnlen(name, LIB_PATH_LENGTH))) {
|
||||
found = true;
|
||||
goto release_sem;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
release_sem:
|
||||
up_read(&mm->mmap_lock);
|
||||
mmput(mm);
|
||||
|
||||
}
|
||||
put_task_struct(p);
|
||||
kfree(tmp_lib_name);
|
||||
return found;
|
||||
}
|
||||
|
||||
bool is_sched_lib_task(void)
|
||||
{
|
||||
if (strnlen(sched_lib_task, LIB_PATH_LENGTH) == 0)
|
||||
return false;
|
||||
|
||||
if (strnstr(current->comm, sched_lib_task, strnlen(current->comm, LIB_PATH_LENGTH)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static char cpu_cap_fixup_target[TASK_COMM_LEN];
|
||||
|
||||
static int proc_cpu_capacity_fixup_target_show(struct seq_file *m, void *data)
|
||||
{
|
||||
seq_printf(m, "%s\n", cpu_cap_fixup_target);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int proc_cpu_capacity_fixup_target_open(struct inode *inode,
|
||||
struct file *file)
|
||||
{
|
||||
return single_open(file, proc_cpu_capacity_fixup_target_show, NULL);
|
||||
}
|
||||
|
||||
static ssize_t proc_cpu_capacity_fixup_target_write(struct file *file,
|
||||
const char __user *buf, size_t count, loff_t *offs)
|
||||
{
|
||||
char temp[TASK_COMM_LEN] = {0, };
|
||||
int len = 0;
|
||||
|
||||
len = (count > TASK_COMM_LEN) ? TASK_COMM_LEN : count;
|
||||
if (copy_from_user(temp, buf, len))
|
||||
return -EFAULT;
|
||||
|
||||
if (temp[len - 1] == '\n')
|
||||
temp[len - 1] = '\0';
|
||||
|
||||
strlcpy(cpu_cap_fixup_target, temp, TASK_COMM_LEN);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct proc_ops proc_cpu_capacity_fixup_target_op = {
|
||||
.proc_open = proc_cpu_capacity_fixup_target_open,
|
||||
.proc_write = proc_cpu_capacity_fixup_target_write,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
.proc_release = single_release,
|
||||
};
|
||||
|
||||
static void android_rvh_show_max_freq(void *unused, struct cpufreq_policy *policy,
|
||||
unsigned int *max_freq)
|
||||
{
|
||||
int curr_len = 0;
|
||||
|
||||
if (!cpuinfo_max_freq_cached)
|
||||
return;
|
||||
|
||||
curr_len = strnlen(current->comm, TASK_COMM_LEN);
|
||||
if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
|
||||
if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
|
||||
*max_freq = cpuinfo_max_freq_cached;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(BIT(policy->cpu) & sched_lib_mask_force))
|
||||
return;
|
||||
|
||||
if (is_sched_lib_based_app(current->pid) || is_sched_lib_task())
|
||||
*max_freq = cpuinfo_max_freq_cached << 1;
|
||||
}
|
||||
|
||||
static void android_rvh_cpu_capacity_show(void *unused,
|
||||
unsigned long *capacity, int cpu)
|
||||
{
|
||||
int curr_len = 0;
|
||||
|
||||
curr_len = strnlen(current->comm, TASK_COMM_LEN);
|
||||
if (strnlen(cpu_cap_fixup_target, TASK_COMM_LEN) == curr_len) {
|
||||
if (!strncmp(current->comm, cpu_cap_fixup_target, curr_len)) {
|
||||
*capacity = SCHED_CAPACITY_SCALE;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!soc_sched_lib_name_capacity)
|
||||
return;
|
||||
|
||||
if ((is_sched_lib_based_app(current->pid) || is_sched_lib_task()) &&
|
||||
cpu < soc_sched_lib_name_capacity)
|
||||
*capacity = 100;
|
||||
}
|
||||
|
||||
void walt_fixup_init(void)
|
||||
{
|
||||
if (!proc_create("cpu_capacity_fixup_target",
|
||||
0660, NULL, &proc_cpu_capacity_fixup_target_op))
|
||||
pr_err("Failed to register 'cpu_capacity_fixup_target'\n");
|
||||
|
||||
register_trace_android_rvh_show_max_freq(android_rvh_show_max_freq, NULL);
|
||||
register_trace_android_rvh_cpu_capacity_show(android_rvh_cpu_capacity_show, NULL);
|
||||
}
|
300
kernel/sched/walt/input-boost.c
Normal file
300
kernel/sched/walt/input-boost.c
Normal file
@@ -0,0 +1,300 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2013-2015,2017,2019-2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "input-boost: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/input.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/pm_qos.h>
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
#define input_boost_attr_rw(_name) \
|
||||
static struct kobj_attribute _name##_attr = \
|
||||
__ATTR(_name, 0644, show_##_name, store_##_name)
|
||||
|
||||
#define show_one(file_name) \
|
||||
static ssize_t show_##file_name \
|
||||
(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \
|
||||
{ \
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", file_name); \
|
||||
}
|
||||
|
||||
#define store_one(file_name) \
|
||||
static ssize_t store_##file_name \
|
||||
(struct kobject *kobj, struct kobj_attribute *attr, \
|
||||
const char *buf, size_t count) \
|
||||
{ \
|
||||
\
|
||||
sscanf(buf, "%u", &file_name); \
|
||||
return count; \
|
||||
}
|
||||
|
||||
struct cpu_sync {
|
||||
int cpu;
|
||||
unsigned int input_boost_min;
|
||||
unsigned int input_boost_freq;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct cpu_sync, sync_info);
|
||||
static struct workqueue_struct *input_boost_wq;
|
||||
|
||||
static struct work_struct input_boost_work;
|
||||
|
||||
static bool sched_boost_active;
|
||||
|
||||
static struct delayed_work input_boost_rem;
|
||||
static u64 last_input_time;
|
||||
#define MIN_INPUT_INTERVAL (150 * USEC_PER_MSEC)
|
||||
|
||||
static DEFINE_PER_CPU(struct freq_qos_request, qos_req);
|
||||
|
||||
static void boost_adjust_notify(struct cpufreq_policy *policy)
|
||||
{
|
||||
unsigned int cpu = policy->cpu;
|
||||
struct cpu_sync *s = &per_cpu(sync_info, cpu);
|
||||
unsigned int ib_min = s->input_boost_min;
|
||||
struct freq_qos_request *req = &per_cpu(qos_req, cpu);
|
||||
int ret;
|
||||
|
||||
pr_debug("CPU%u policy min before boost: %u kHz\n",
|
||||
cpu, policy->min);
|
||||
pr_debug("CPU%u boost min: %u kHz\n", cpu, ib_min);
|
||||
|
||||
ret = freq_qos_update_request(req, ib_min);
|
||||
|
||||
if (ret < 0)
|
||||
pr_err("Failed to update freq constraint in boost_adjust: %d\n",
|
||||
ib_min);
|
||||
|
||||
pr_debug("CPU%u policy min after boost: %u kHz\n", cpu, policy->min);
|
||||
}
|
||||
|
||||
static void update_policy_online(void)
|
||||
{
|
||||
unsigned int i;
|
||||
struct cpufreq_policy *policy;
|
||||
struct cpumask online_cpus;
|
||||
|
||||
/* Re-evaluate policy to trigger adjust notifier for online CPUs */
|
||||
cpus_read_lock();
|
||||
online_cpus = *cpu_online_mask;
|
||||
for_each_cpu(i, &online_cpus) {
|
||||
policy = cpufreq_cpu_get(i);
|
||||
if (!policy) {
|
||||
pr_err("%s: cpufreq policy not found for cpu%d\n",
|
||||
__func__, i);
|
||||
return;
|
||||
}
|
||||
|
||||
cpumask_andnot(&online_cpus, &online_cpus,
|
||||
policy->related_cpus);
|
||||
boost_adjust_notify(policy);
|
||||
}
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
static void do_input_boost_rem(struct work_struct *work)
|
||||
{
|
||||
unsigned int i, ret;
|
||||
struct cpu_sync *i_sync_info;
|
||||
|
||||
/* Reset the input_boost_min for all CPUs in the system */
|
||||
pr_debug("Resetting input boost min for all CPUs\n");
|
||||
for_each_possible_cpu(i) {
|
||||
i_sync_info = &per_cpu(sync_info, i);
|
||||
i_sync_info->input_boost_min = 0;
|
||||
}
|
||||
|
||||
/* Update policies for all online CPUs */
|
||||
update_policy_online();
|
||||
|
||||
if (sched_boost_active) {
|
||||
ret = sched_set_boost(0);
|
||||
if (!ret)
|
||||
pr_err("input-boost: sched boost disable failed\n");
|
||||
sched_boost_active = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void do_input_boost(struct work_struct *work)
|
||||
{
|
||||
unsigned int cpu, ret;
|
||||
struct cpu_sync *i_sync_info;
|
||||
|
||||
cancel_delayed_work_sync(&input_boost_rem);
|
||||
if (sched_boost_active) {
|
||||
sched_set_boost(0);
|
||||
sched_boost_active = false;
|
||||
}
|
||||
|
||||
/* Set the input_boost_min for all CPUs in the system */
|
||||
pr_debug("Setting input boost min for all CPUs\n");
|
||||
for_each_possible_cpu(cpu) {
|
||||
i_sync_info = &per_cpu(sync_info, cpu);
|
||||
i_sync_info->input_boost_min = sysctl_input_boost_freq[cpu];
|
||||
}
|
||||
|
||||
/* Update policies for all online CPUs */
|
||||
update_policy_online();
|
||||
|
||||
/* Enable scheduler boost to migrate tasks to big cluster */
|
||||
if (sysctl_sched_boost_on_input > 0) {
|
||||
ret = sched_set_boost(sysctl_sched_boost_on_input);
|
||||
if (ret)
|
||||
pr_err("input-boost: sched boost enable failed\n");
|
||||
else
|
||||
sched_boost_active = true;
|
||||
}
|
||||
|
||||
queue_delayed_work(input_boost_wq, &input_boost_rem,
|
||||
msecs_to_jiffies(sysctl_input_boost_ms));
|
||||
}
|
||||
|
||||
static void inputboost_input_event(struct input_handle *handle,
|
||||
unsigned int type, unsigned int code, int value)
|
||||
{
|
||||
u64 now;
|
||||
int cpu;
|
||||
int enabled = 0;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (sysctl_input_boost_freq[cpu] > 0) {
|
||||
enabled = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
now = ktime_to_us(ktime_get());
|
||||
if (now - last_input_time < MIN_INPUT_INTERVAL)
|
||||
return;
|
||||
|
||||
if (work_pending(&input_boost_work))
|
||||
return;
|
||||
|
||||
queue_work(input_boost_wq, &input_boost_work);
|
||||
last_input_time = ktime_to_us(ktime_get());
|
||||
}
|
||||
|
||||
static int inputboost_input_connect(struct input_handler *handler,
|
||||
struct input_dev *dev, const struct input_device_id *id)
|
||||
{
|
||||
struct input_handle *handle;
|
||||
int error;
|
||||
|
||||
handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
|
||||
if (!handle)
|
||||
return -ENOMEM;
|
||||
|
||||
handle->dev = dev;
|
||||
handle->handler = handler;
|
||||
handle->name = "cpufreq";
|
||||
|
||||
error = input_register_handle(handle);
|
||||
if (error)
|
||||
goto err2;
|
||||
|
||||
error = input_open_device(handle);
|
||||
if (error)
|
||||
goto err1;
|
||||
|
||||
return 0;
|
||||
err1:
|
||||
input_unregister_handle(handle);
|
||||
err2:
|
||||
kfree(handle);
|
||||
return error;
|
||||
}
|
||||
|
||||
static void inputboost_input_disconnect(struct input_handle *handle)
|
||||
{
|
||||
input_close_device(handle);
|
||||
input_unregister_handle(handle);
|
||||
kfree(handle);
|
||||
}
|
||||
|
||||
static const struct input_device_id inputboost_ids[] = {
|
||||
/* multi-touch touchscreen */
|
||||
{
|
||||
.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
|
||||
INPUT_DEVICE_ID_MATCH_ABSBIT,
|
||||
.evbit = { BIT_MASK(EV_ABS) },
|
||||
.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
|
||||
BIT_MASK(ABS_MT_POSITION_X) |
|
||||
BIT_MASK(ABS_MT_POSITION_Y)
|
||||
},
|
||||
},
|
||||
/* touchpad */
|
||||
{
|
||||
.flags = INPUT_DEVICE_ID_MATCH_KEYBIT |
|
||||
INPUT_DEVICE_ID_MATCH_ABSBIT,
|
||||
.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
|
||||
.absbit = { [BIT_WORD(ABS_X)] =
|
||||
BIT_MASK(ABS_X) | BIT_MASK(ABS_Y)
|
||||
},
|
||||
},
|
||||
/* Keypad */
|
||||
{
|
||||
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
|
||||
.evbit = { BIT_MASK(EV_KEY) },
|
||||
},
|
||||
{ },
|
||||
};
|
||||
|
||||
static struct input_handler inputboost_input_handler = {
|
||||
.event = inputboost_input_event,
|
||||
.connect = inputboost_input_connect,
|
||||
.disconnect = inputboost_input_disconnect,
|
||||
.name = "input-boost",
|
||||
.id_table = inputboost_ids,
|
||||
};
|
||||
|
||||
struct kobject *input_boost_kobj;
|
||||
int input_boost_init(void)
|
||||
{
|
||||
int cpu, ret;
|
||||
struct cpu_sync *s;
|
||||
struct cpufreq_policy *policy;
|
||||
struct freq_qos_request *req;
|
||||
|
||||
input_boost_wq = alloc_workqueue("inputboost_wq", WQ_HIGHPRI, 0);
|
||||
if (!input_boost_wq)
|
||||
return -EFAULT;
|
||||
|
||||
INIT_WORK(&input_boost_work, do_input_boost);
|
||||
INIT_DELAYED_WORK(&input_boost_rem, do_input_boost_rem);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
s = &per_cpu(sync_info, cpu);
|
||||
s->cpu = cpu;
|
||||
req = &per_cpu(qos_req, cpu);
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (!policy) {
|
||||
pr_err("%s: cpufreq policy not found for cpu%d\n",
|
||||
__func__, cpu);
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
ret = freq_qos_add_request(&policy->constraints, req,
|
||||
FREQ_QOS_MIN, policy->min);
|
||||
if (ret < 0) {
|
||||
pr_err("%s: Failed to add freq constraint (%d)\n",
|
||||
__func__, ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = input_register_handler(&inputboost_input_handler);
|
||||
return 0;
|
||||
}
|
44
kernel/sched/walt/mvp_locking.c
Normal file
44
kernel/sched/walt/mvp_locking.c
Normal file
@@ -0,0 +1,44 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <trace/hooks/dtask.h>
|
||||
#include "../../locking/mutex.h"
|
||||
#include "walt.h"
|
||||
|
||||
static void android_vh_alter_mutex_list_add(void *unused, struct mutex *lock,
|
||||
struct mutex_waiter *waiter, struct list_head *list,
|
||||
bool *already_on_list)
|
||||
{
|
||||
struct walt_task_struct *wts_waiter =
|
||||
(struct walt_task_struct *)current->android_vendor_data1;
|
||||
struct mutex_waiter *pos = NULL;
|
||||
struct mutex_waiter *n = NULL;
|
||||
struct list_head *head = list;
|
||||
struct walt_task_struct *wts;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return;
|
||||
|
||||
if (!lock || !waiter || !list)
|
||||
return;
|
||||
|
||||
if (!is_mvp(wts_waiter))
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(pos, n, head, list) {
|
||||
wts = (struct walt_task_struct *)
|
||||
((struct task_struct *)(pos->task)->android_vendor_data1);
|
||||
if (!is_mvp(wts)) {
|
||||
list_add(&waiter->list, pos->list.prev);
|
||||
*already_on_list = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void walt_mvp_lock_ordering_init(void)
|
||||
{
|
||||
register_trace_android_vh_alter_mutex_list_add(android_vh_alter_mutex_list_add, NULL);
|
||||
}
|
239
kernel/sched/walt/perf_trace_counters.h
Normal file
239
kernel/sched/walt/perf_trace_counters.h
Normal file
@@ -0,0 +1,239 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2013-2014, 2017, 2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM perf_trace_counters
|
||||
|
||||
#if !defined(_PERF_TRACE_COUNTERS_H_) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _PERF_TRACE_COUNTERS_H_
|
||||
|
||||
/* Ctr index for PMCNTENSET/CLR */
|
||||
#define CC 0x80000000
|
||||
#define C0 0x1
|
||||
#define C1 0x2
|
||||
#define C2 0x4
|
||||
#define C3 0x8
|
||||
#define C4 0x10
|
||||
#define C5 0x20
|
||||
#define C_ALL (CC | C0 | C1 | C2 | C3 | C4 | C5)
|
||||
#define TYPE_MASK 0xFFFF
|
||||
#define NUM_L1_CTRS 6
|
||||
#define NUM_AMU_CTRS 3
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
DECLARE_PER_CPU(u32, cntenset_val);
|
||||
DECLARE_PER_CPU(unsigned long, previous_ccnt);
|
||||
DECLARE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
|
||||
DECLARE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
|
||||
|
||||
#ifdef CREATE_TRACE_POINTS
|
||||
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
|
||||
{
|
||||
unsigned int state;
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
BUG_ON(p != current);
|
||||
#endif /* CONFIG_SCHED_DEBUG */
|
||||
|
||||
/*
|
||||
* Preemption ignores task state, therefore preempted tasks are always
|
||||
* RUNNING (we will not have dequeued if state != RUNNING).
|
||||
*/
|
||||
if (preempt)
|
||||
return TASK_REPORT_MAX;
|
||||
|
||||
/*
|
||||
* task_state_index() uses fls() and returns a value from 0-8 range.
|
||||
* Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
|
||||
* it for left shift operation to get the correct task->state
|
||||
* mapping.
|
||||
*/
|
||||
state = task_state_index(p);
|
||||
|
||||
return state ? (1 << (state - 1)) : state;
|
||||
}
|
||||
#endif /* CREATE_TRACE_POINTS */
|
||||
|
||||
TRACE_EVENT(sched_switch_with_ctrs,
|
||||
|
||||
TP_PROTO(bool preempt,
|
||||
struct task_struct *prev,
|
||||
struct task_struct *next),
|
||||
|
||||
TP_ARGS(preempt, prev, next),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(pid_t, prev_pid)
|
||||
__field(pid_t, next_pid)
|
||||
__array(char, prev_comm, TASK_COMM_LEN)
|
||||
__array(char, next_comm, TASK_COMM_LEN)
|
||||
__field(long, prev_state)
|
||||
__field(unsigned long, cctr)
|
||||
__field(unsigned long, ctr0)
|
||||
__field(unsigned long, ctr1)
|
||||
__field(unsigned long, ctr2)
|
||||
__field(unsigned long, ctr3)
|
||||
__field(unsigned long, ctr4)
|
||||
__field(unsigned long, ctr5)
|
||||
__field(unsigned long, amu0)
|
||||
__field(unsigned long, amu1)
|
||||
__field(unsigned long, amu2)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
u32 cpu = smp_processor_id();
|
||||
u32 i;
|
||||
u32 cnten_val;
|
||||
unsigned long total_ccnt = 0;
|
||||
unsigned long total_cnt = 0;
|
||||
unsigned long amu_cnt = 0;
|
||||
unsigned long delta_l1_cnts[NUM_L1_CTRS] = {0};
|
||||
unsigned long delta_amu_cnts[NUM_AMU_CTRS] = {0};
|
||||
|
||||
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
|
||||
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
|
||||
__entry->prev_state = __trace_sched_switch_state(preempt, prev);
|
||||
__entry->prev_pid = prev->pid;
|
||||
__entry->next_pid = next->pid;
|
||||
|
||||
cnten_val = per_cpu(cntenset_val, cpu);
|
||||
|
||||
if (cnten_val & CC) {
|
||||
/* Read value */
|
||||
total_ccnt = read_sysreg(pmccntr_el0);
|
||||
__entry->cctr = total_ccnt -
|
||||
per_cpu(previous_ccnt, cpu);
|
||||
per_cpu(previous_ccnt, cpu) = total_ccnt;
|
||||
}
|
||||
for (i = 0; i < NUM_L1_CTRS; i++) {
|
||||
if (cnten_val & (1 << i)) {
|
||||
/* Select */
|
||||
write_sysreg(i, pmselr_el0);
|
||||
isb();
|
||||
/* Read value */
|
||||
total_cnt = read_sysreg(pmxevcntr_el0);
|
||||
delta_l1_cnts[i] = total_cnt -
|
||||
per_cpu(previous_l1_cnts[i], cpu);
|
||||
per_cpu(previous_l1_cnts[i], cpu) =
|
||||
total_cnt;
|
||||
} else
|
||||
delta_l1_cnts[i] = 0;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_AMU_EXTN)) {
|
||||
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
|
||||
delta_amu_cnts[0] = amu_cnt -
|
||||
per_cpu(previous_amu_cnts[0], cpu);
|
||||
per_cpu(previous_amu_cnts[0], cpu) = amu_cnt;
|
||||
|
||||
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
|
||||
delta_amu_cnts[1] = amu_cnt -
|
||||
per_cpu(previous_amu_cnts[1], cpu);
|
||||
per_cpu(previous_amu_cnts[1], cpu) = amu_cnt;
|
||||
|
||||
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_MEM_STALL);
|
||||
delta_amu_cnts[2] = amu_cnt -
|
||||
per_cpu(previous_amu_cnts[2], cpu);
|
||||
per_cpu(previous_amu_cnts[2], cpu) = amu_cnt;
|
||||
}
|
||||
|
||||
__entry->ctr0 = delta_l1_cnts[0];
|
||||
__entry->ctr1 = delta_l1_cnts[1];
|
||||
__entry->ctr2 = delta_l1_cnts[2];
|
||||
__entry->ctr3 = delta_l1_cnts[3];
|
||||
__entry->ctr4 = delta_l1_cnts[4];
|
||||
__entry->ctr5 = delta_l1_cnts[5];
|
||||
__entry->amu0 = delta_amu_cnts[0];
|
||||
__entry->amu1 = delta_amu_cnts[1];
|
||||
__entry->amu2 = delta_amu_cnts[2];
|
||||
),
|
||||
|
||||
TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s next_pid=%d CCNTR=%lu CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu, CYC: %lu, INST: %lu, STALL: %lu",
|
||||
__entry->prev_comm, __entry->prev_pid,
|
||||
|
||||
(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
|
||||
__print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
|
||||
{ TASK_INTERRUPTIBLE, "S" },
|
||||
{ TASK_UNINTERRUPTIBLE, "D" },
|
||||
{ __TASK_STOPPED, "T" },
|
||||
{ __TASK_TRACED, "t" },
|
||||
{ EXIT_DEAD, "X" },
|
||||
{ EXIT_ZOMBIE, "Z" },
|
||||
{ TASK_PARKED, "P" },
|
||||
{ TASK_DEAD, "I" }) :
|
||||
"R",
|
||||
|
||||
__entry->prev_state & TASK_REPORT_MAX ? "+" : "",
|
||||
__entry->next_comm,
|
||||
__entry->next_pid,
|
||||
__entry->cctr,
|
||||
__entry->ctr0, __entry->ctr1,
|
||||
__entry->ctr2, __entry->ctr3,
|
||||
__entry->ctr4, __entry->ctr5,
|
||||
__entry->amu0, __entry->amu1,
|
||||
__entry->amu2)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_switch_ctrs_cfg,
|
||||
|
||||
TP_PROTO(int cpu),
|
||||
|
||||
TP_ARGS(cpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, cpu)
|
||||
__field(unsigned long, ctr0)
|
||||
__field(unsigned long, ctr1)
|
||||
__field(unsigned long, ctr2)
|
||||
__field(unsigned long, ctr3)
|
||||
__field(unsigned long, ctr4)
|
||||
__field(unsigned long, ctr5)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
u32 i;
|
||||
u32 cnten_val;
|
||||
u32 ctr_type[NUM_L1_CTRS] = {0};
|
||||
|
||||
cnten_val = per_cpu(cntenset_val, cpu);
|
||||
|
||||
for (i = 0; i < NUM_L1_CTRS; i++) {
|
||||
if (cnten_val & (1 << i)) {
|
||||
/* Select */
|
||||
write_sysreg(i, pmselr_el0);
|
||||
isb();
|
||||
/* Read type */
|
||||
ctr_type[i] = read_sysreg(pmxevtyper_el0)
|
||||
& TYPE_MASK;
|
||||
} else
|
||||
ctr_type[i] = 0;
|
||||
}
|
||||
|
||||
__entry->cpu = cpu;
|
||||
__entry->ctr0 = ctr_type[0];
|
||||
__entry->ctr1 = ctr_type[1];
|
||||
__entry->ctr2 = ctr_type[2];
|
||||
__entry->ctr3 = ctr_type[3];
|
||||
__entry->ctr4 = ctr_type[4];
|
||||
__entry->ctr5 = ctr_type[5];
|
||||
),
|
||||
|
||||
TP_printk("cpu=%d CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu",
|
||||
__entry->cpu,
|
||||
__entry->ctr0, __entry->ctr1,
|
||||
__entry->ctr2, __entry->ctr3,
|
||||
__entry->ctr4, __entry->ctr5)
|
||||
);
|
||||
|
||||
#endif
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH ../../kernel/sched/walt
|
||||
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE perf_trace_counters
|
||||
#include <trace/define_trace.h>
|
762
kernel/sched/walt/pipeline.c
Normal file
762
kernel/sched/walt/pipeline.c
Normal file
@@ -0,0 +1,762 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "walt.h"
|
||||
#include "trace.h"
|
||||
|
||||
|
||||
static DEFINE_RAW_SPINLOCK(pipeline_lock);
|
||||
static struct walt_task_struct *pipeline_wts[WALT_NR_CPUS];
|
||||
int pipeline_nr;
|
||||
|
||||
static DEFINE_RAW_SPINLOCK(heavy_lock);
|
||||
static struct walt_task_struct *heavy_wts[MAX_NR_PIPELINE];
|
||||
bool pipeline_pinning;
|
||||
|
||||
static inline int pipeline_demand(struct walt_task_struct *wts)
|
||||
{
|
||||
return scale_time_to_util(wts->coloc_demand);
|
||||
}
|
||||
|
||||
int add_pipeline(struct walt_task_struct *wts)
|
||||
{
|
||||
int i, pos = -1, ret = -ENOSPC;
|
||||
unsigned long flags;
|
||||
int max_nr_pipeline = cpumask_weight(&cpus_for_pipeline);
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return -EAGAIN;
|
||||
|
||||
raw_spin_lock_irqsave(&pipeline_lock, flags);
|
||||
|
||||
for (i = 0; i < max_nr_pipeline; i++) {
|
||||
if (wts == pipeline_wts[i]) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pipeline_wts[i] == NULL)
|
||||
pos = i;
|
||||
}
|
||||
|
||||
if (pos != -1) {
|
||||
pipeline_wts[pos] = wts;
|
||||
pipeline_nr++;
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int remove_pipeline(struct walt_task_struct *wts)
|
||||
{
|
||||
int i, j, ret = 0;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return -EAGAIN;
|
||||
|
||||
raw_spin_lock_irqsave(&pipeline_lock, flags);
|
||||
|
||||
for (i = 0; i < WALT_NR_CPUS; i++) {
|
||||
if (wts == pipeline_wts[i]) {
|
||||
wts->low_latency &= ~WALT_LOW_LATENCY_PIPELINE_BIT;
|
||||
pipeline_wts[i] = NULL;
|
||||
pipeline_nr--;
|
||||
for (j = i; j < WALT_NR_CPUS - 1; j++) {
|
||||
pipeline_wts[j] = pipeline_wts[j + 1];
|
||||
pipeline_wts[j + 1] = NULL;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int remove_heavy(struct walt_task_struct *wts)
|
||||
{
|
||||
int i, j, ret = 0;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return -EAGAIN;
|
||||
|
||||
raw_spin_lock_irqsave(&heavy_lock, flags);
|
||||
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
if (wts == heavy_wts[i]) {
|
||||
wts->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
|
||||
heavy_wts[i] = NULL;
|
||||
have_heavy_list--;
|
||||
for (j = i; j < MAX_NR_PIPELINE - 1; j++) {
|
||||
heavy_wts[j] = heavy_wts[j + 1];
|
||||
heavy_wts[j + 1] = NULL;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&heavy_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void remove_special_task(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&heavy_lock, flags);
|
||||
/*
|
||||
* Although the pipeline special task designation is removed,
|
||||
* if the task is not dead (i.e. this function was called from sysctl context)
|
||||
* the task will continue to enjoy pipeline priveleges until the next update in
|
||||
* find_heaviest_topapp()
|
||||
*/
|
||||
pipeline_special_task = NULL;
|
||||
raw_spin_unlock_irqrestore(&heavy_lock, flags);
|
||||
}
|
||||
|
||||
void set_special_task(struct task_struct *pipeline_special_local)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&heavy_lock, flags);
|
||||
pipeline_special_task = pipeline_special_local;
|
||||
raw_spin_unlock_irqrestore(&heavy_lock, flags);
|
||||
}
|
||||
|
||||
cpumask_t cpus_for_pipeline = { CPU_BITS_NONE };
|
||||
|
||||
/* always set unisolation for max cluster, for pipeline tasks */
|
||||
static inline void pipeline_set_unisolation(bool set, int flag)
|
||||
{
|
||||
static bool unisolation_state;
|
||||
struct walt_sched_cluster *cluster;
|
||||
static unsigned int enable_pipeline_unisolation;
|
||||
|
||||
if (!set)
|
||||
enable_pipeline_unisolation &= ~(1 << flag);
|
||||
else
|
||||
enable_pipeline_unisolation |= (1 << flag);
|
||||
|
||||
if (unisolation_state && !enable_pipeline_unisolation) {
|
||||
unisolation_state = false;
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
|
||||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
|
||||
core_ctl_set_cluster_boost(cluster->id, false);
|
||||
}
|
||||
} else if (!unisolation_state && enable_pipeline_unisolation) {
|
||||
unisolation_state = true;
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
if (cpumask_intersects(&cpus_for_pipeline, &cluster->cpus) ||
|
||||
is_max_possible_cluster_cpu(cpumask_first(&cluster->cpus)))
|
||||
core_ctl_set_cluster_boost(cluster->id, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* sysctl_sched_heavy_nr or sysctl_sched_pipeline_util_thres can change at any moment in time.
|
||||
* as a result, the ability to set/clear unisolation state for a particular type of pipeline, is
|
||||
* hindered. Detect a transition and reset the unisolation state of the pipeline method no longer
|
||||
* in use.
|
||||
*/
|
||||
static inline void pipeline_reset_unisolation_state(void)
|
||||
{
|
||||
static bool last_auto_pipeline;
|
||||
|
||||
if ((sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres) && !last_auto_pipeline) {
|
||||
pipeline_set_unisolation(false, MANUAL_PIPELINE);
|
||||
last_auto_pipeline = true;
|
||||
} else if (!sysctl_sched_heavy_nr &&
|
||||
!sysctl_sched_pipeline_util_thres && last_auto_pipeline) {
|
||||
pipeline_set_unisolation(false, AUTO_PIPELINE);
|
||||
last_auto_pipeline = false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool should_pipeline_pin_special(void)
|
||||
{
|
||||
if (!pipeline_special_task)
|
||||
return false;
|
||||
if (!heavy_wts[MAX_NR_PIPELINE - 1])
|
||||
return false;
|
||||
if (pipeline_demand(heavy_wts[0]) <= sysctl_pipeline_special_task_util_thres)
|
||||
return true;
|
||||
if (pipeline_demand(heavy_wts[1]) <= sysctl_pipeline_non_special_task_util_thres)
|
||||
return true;
|
||||
if (pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
|
||||
mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_low_pct, 100)))
|
||||
return false;
|
||||
if (!pipeline_pinning && (pipeline_demand(heavy_wts[0]) <=
|
||||
mult_frac(pipeline_demand(heavy_wts[1]), sysctl_pipeline_pin_thres_high_pct, 100)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
cpumask_t last_available_big_cpus = CPU_MASK_NONE;
|
||||
int have_heavy_list;
|
||||
u32 total_util;
|
||||
bool find_heaviest_topapp(u64 window_start)
|
||||
{
|
||||
struct walt_related_thread_group *grp;
|
||||
struct walt_task_struct *wts;
|
||||
unsigned long flags;
|
||||
static u64 last_rearrange_ns;
|
||||
int i, j, start;
|
||||
struct walt_task_struct *heavy_wts_to_drop[MAX_NR_PIPELINE];
|
||||
|
||||
if (num_sched_clusters < 2)
|
||||
return false;
|
||||
|
||||
/* lazy enabling disabling until 100mS for colocation or heavy_nr change */
|
||||
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
||||
if (!grp || (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres) ||
|
||||
sched_boost_type) {
|
||||
if (have_heavy_list) {
|
||||
raw_spin_lock_irqsave(&heavy_lock, flags);
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
if (heavy_wts[i]) {
|
||||
heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
|
||||
heavy_wts[i]->pipeline_cpu = -1;
|
||||
heavy_wts[i] = NULL;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&heavy_lock, flags);
|
||||
have_heavy_list = 0;
|
||||
|
||||
pipeline_set_unisolation(false, AUTO_PIPELINE);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (last_rearrange_ns && (window_start < (last_rearrange_ns + 100 * MSEC_TO_NSEC)))
|
||||
return false;
|
||||
last_rearrange_ns = window_start;
|
||||
|
||||
raw_spin_lock_irqsave(&grp->lock, flags);
|
||||
raw_spin_lock(&heavy_lock);
|
||||
|
||||
/* remember the old ones in _to_drop[] */
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
heavy_wts_to_drop[i] = heavy_wts[i];
|
||||
heavy_wts[i] = NULL;
|
||||
}
|
||||
|
||||
/* Assign user specified one (if exists) to slot 0*/
|
||||
if (pipeline_special_task) {
|
||||
heavy_wts[0] = (struct walt_task_struct *)
|
||||
pipeline_special_task->android_vendor_data1;
|
||||
start = 1;
|
||||
} else {
|
||||
start = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that heavy_wts either contains the top 3 top-app tasks,
|
||||
* or the user defined heavy task followed by the top 2 top-app tasks
|
||||
*/
|
||||
list_for_each_entry(wts, &grp->tasks, grp_list) {
|
||||
struct walt_task_struct *to_be_placed_wts = wts;
|
||||
|
||||
/* if the task hasnt seen action recently skip it */
|
||||
if (wts->mark_start < window_start - (sched_ravg_window * 2))
|
||||
continue;
|
||||
|
||||
/* skip user defined task as it's already part of the list*/
|
||||
if (pipeline_special_task && (wts == heavy_wts[0]))
|
||||
continue;
|
||||
|
||||
for (i = start; i < MAX_NR_PIPELINE; i++) {
|
||||
if (!heavy_wts[i]) {
|
||||
heavy_wts[i] = to_be_placed_wts;
|
||||
break;
|
||||
} else if (pipeline_demand(to_be_placed_wts) >=
|
||||
pipeline_demand(heavy_wts[i])) {
|
||||
struct walt_task_struct *tmp;
|
||||
|
||||
tmp = heavy_wts[i];
|
||||
heavy_wts[i] = to_be_placed_wts;
|
||||
to_be_placed_wts = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine how many of the top three pipeline tasks
|
||||
* If "sched_heavy_nr" node is set, the util threshold is ignored.
|
||||
*/
|
||||
total_util = 0;
|
||||
if (sysctl_sched_heavy_nr) {
|
||||
for (i = sysctl_sched_heavy_nr; i < MAX_NR_PIPELINE; i++)
|
||||
heavy_wts[i] = NULL;
|
||||
} else {
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
if (heavy_wts[i])
|
||||
total_util += pipeline_demand(heavy_wts[i]);
|
||||
}
|
||||
|
||||
if (total_util < sysctl_sched_pipeline_util_thres)
|
||||
heavy_wts[MAX_NR_PIPELINE - 1] = NULL;
|
||||
}
|
||||
|
||||
/* reset heavy for tasks that are no longer heavy */
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
bool reset = true;
|
||||
|
||||
if (!heavy_wts_to_drop[i])
|
||||
continue;
|
||||
for (j = 0; j < MAX_NR_PIPELINE; j++) {
|
||||
if (!heavy_wts[j])
|
||||
continue;
|
||||
if (heavy_wts_to_drop[i] == heavy_wts[j]) {
|
||||
reset = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (reset) {
|
||||
heavy_wts_to_drop[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
|
||||
heavy_wts_to_drop[i]->pipeline_cpu = -1;
|
||||
}
|
||||
|
||||
if (heavy_wts[i]) {
|
||||
heavy_wts[i]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
if (heavy_wts[MAX_NR_PIPELINE - 1])
|
||||
pipeline_set_unisolation(true, AUTO_PIPELINE);
|
||||
else
|
||||
pipeline_set_unisolation(false, AUTO_PIPELINE);
|
||||
|
||||
raw_spin_unlock(&heavy_lock);
|
||||
raw_spin_unlock_irqrestore(&grp->lock, flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
void assign_heaviest_topapp(bool found_topapp)
|
||||
{
|
||||
int i;
|
||||
struct walt_task_struct *wts;
|
||||
|
||||
if (!found_topapp)
|
||||
return;
|
||||
|
||||
raw_spin_lock(&heavy_lock);
|
||||
|
||||
/* start with non-prime cpus chosen for this chipset (e.g. golds) */
|
||||
cpumask_and(&last_available_big_cpus, cpu_online_mask, &cpus_for_pipeline);
|
||||
cpumask_andnot(&last_available_big_cpus, &last_available_big_cpus, cpu_halt_mask);
|
||||
|
||||
/*
|
||||
* Ensure the special task is only pinned if there are 3 auto pipeline tasks and
|
||||
* check certain demand conditions between special pipeline task and the largest
|
||||
* non-special pipeline task.
|
||||
*/
|
||||
if (should_pipeline_pin_special()) {
|
||||
pipeline_pinning = true;
|
||||
heavy_wts[0]->pipeline_cpu =
|
||||
cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
|
||||
heavy_wts[0]->low_latency |= WALT_LOW_LATENCY_HEAVY_BIT;
|
||||
if (cpumask_test_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus))
|
||||
cpumask_clear_cpu(heavy_wts[0]->pipeline_cpu, &last_available_big_cpus);
|
||||
} else {
|
||||
pipeline_pinning = false;
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
wts = heavy_wts[i];
|
||||
if (!wts)
|
||||
continue;
|
||||
|
||||
if (i == 0 && pipeline_pinning)
|
||||
continue;
|
||||
|
||||
if (wts->pipeline_cpu != -1) {
|
||||
if (cpumask_test_cpu(wts->pipeline_cpu, &last_available_big_cpus))
|
||||
cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
|
||||
else
|
||||
/* avoid assigning two pipelines to same cpu */
|
||||
wts->pipeline_cpu = -1;
|
||||
}
|
||||
}
|
||||
|
||||
have_heavy_list = 0;
|
||||
/* assign cpus and heavy status to the new heavy */
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
wts = heavy_wts[i];
|
||||
if (!wts)
|
||||
continue;
|
||||
|
||||
if (wts->pipeline_cpu == -1) {
|
||||
wts->pipeline_cpu = cpumask_last(&last_available_big_cpus);
|
||||
if (wts->pipeline_cpu >= nr_cpu_ids) {
|
||||
/* drop from heavy if it can't be assigned */
|
||||
heavy_wts[i]->low_latency &= ~WALT_LOW_LATENCY_HEAVY_BIT;
|
||||
heavy_wts[i]->pipeline_cpu = -1;
|
||||
heavy_wts[i] = NULL;
|
||||
} else {
|
||||
/*
|
||||
* clear cpu from the avalilable list of pipeline cpus.
|
||||
* as pipeline_cpu is assigned for the task.
|
||||
*/
|
||||
cpumask_clear_cpu(wts->pipeline_cpu, &last_available_big_cpus);
|
||||
}
|
||||
}
|
||||
if (wts->pipeline_cpu >= 0)
|
||||
have_heavy_list++;
|
||||
}
|
||||
|
||||
if (trace_sched_pipeline_tasks_enabled()) {
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
if (heavy_wts[i] != NULL)
|
||||
trace_sched_pipeline_tasks(AUTO_PIPELINE, i, heavy_wts[i],
|
||||
have_heavy_list, total_util, pipeline_pinning);
|
||||
}
|
||||
}
|
||||
|
||||
raw_spin_unlock(&heavy_lock);
|
||||
}
|
||||
static inline void swap_pipeline_with_prime_locked(struct walt_task_struct *prime_wts,
|
||||
struct walt_task_struct *other_wts)
|
||||
{
|
||||
if (prime_wts && other_wts) {
|
||||
if (pipeline_demand(prime_wts) < pipeline_demand(other_wts)) {
|
||||
int cpu;
|
||||
|
||||
cpu = other_wts->pipeline_cpu;
|
||||
other_wts->pipeline_cpu = prime_wts->pipeline_cpu;
|
||||
prime_wts->pipeline_cpu = cpu;
|
||||
trace_sched_pipeline_swapped(other_wts, prime_wts);
|
||||
}
|
||||
} else if (!prime_wts && other_wts) {
|
||||
/* if prime preferred died promote gold to prime, assumes 1 prime */
|
||||
other_wts->pipeline_cpu =
|
||||
cpumask_last(&sched_cluster[num_sched_clusters - 1]->cpus);
|
||||
trace_sched_pipeline_swapped(other_wts, prime_wts);
|
||||
}
|
||||
}
|
||||
|
||||
#define WINDOW_HYSTERESIS 4
|
||||
static inline bool delay_rearrange(u64 window_start, int pipeline_type, bool force)
|
||||
{
|
||||
static u64 last_rearrange_ns[MAX_PIPELINE_TYPES];
|
||||
|
||||
if (!force && last_rearrange_ns[pipeline_type] &&
|
||||
(window_start < (last_rearrange_ns[pipeline_type] +
|
||||
(sched_ravg_window*WINDOW_HYSTERESIS))))
|
||||
return true;
|
||||
last_rearrange_ns[pipeline_type] = window_start;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void find_prime_and_max_tasks(struct walt_task_struct **wts_list,
|
||||
struct walt_task_struct **prime_wts,
|
||||
struct walt_task_struct **other_wts)
|
||||
{
|
||||
int i;
|
||||
int max_demand = 0;
|
||||
|
||||
for (i = 0; i < MAX_NR_PIPELINE; i++) {
|
||||
struct walt_task_struct *wts = wts_list[i];
|
||||
|
||||
if (wts == NULL)
|
||||
continue;
|
||||
|
||||
if (wts->pipeline_cpu < 0)
|
||||
continue;
|
||||
|
||||
if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
|
||||
if (prime_wts)
|
||||
*prime_wts = wts;
|
||||
} else if (other_wts && pipeline_demand(wts) > max_demand) {
|
||||
max_demand = pipeline_demand(wts);
|
||||
*other_wts = wts;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_prime_worthy(struct walt_task_struct *wts)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
if (wts == NULL)
|
||||
return false;
|
||||
|
||||
if (num_sched_clusters < 2)
|
||||
return true;
|
||||
|
||||
p = wts_to_ts(wts);
|
||||
|
||||
/*
|
||||
* Assume the first row of cpu arrays represents the order of clusters
|
||||
* in magnitude of capacities, where the last column represents prime,
|
||||
* and the second to last column represents golds
|
||||
*/
|
||||
return !task_fits_max(p, cpumask_last(&cpu_array[0][num_sched_clusters - 2]));
|
||||
}
|
||||
|
||||
void rearrange_heavy(u64 window_start, bool force)
|
||||
{
|
||||
struct walt_task_struct *prime_wts = NULL;
|
||||
struct walt_task_struct *other_wts = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
if (num_sched_clusters < 2)
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&heavy_lock, flags);
|
||||
/*
|
||||
* TODO: As primes are isolated under have_heavy_list < 3, and pipeline misfits are also
|
||||
* disabled, setting the prime worthy task's pipeline_cpu as CPU7 could lead to the
|
||||
* pipeline_cpu selection being ignored until the next run of find_heaviest_toppapp(),
|
||||
* and furthermore remove the task's current gold pipeline_cpu, which could cause the
|
||||
* task to start bouncing around on the golds, and ultimately lead to suboptimal behavior.
|
||||
*/
|
||||
if (have_heavy_list <= 2) {
|
||||
find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
|
||||
|
||||
if (prime_wts && !is_prime_worthy(prime_wts)) {
|
||||
int assign_cpu;
|
||||
|
||||
/* demote prime_wts, it is not worthy */
|
||||
assign_cpu = cpumask_first(&last_available_big_cpus);
|
||||
if (assign_cpu < nr_cpu_ids) {
|
||||
prime_wts->pipeline_cpu = assign_cpu;
|
||||
cpumask_clear_cpu(assign_cpu, &last_available_big_cpus);
|
||||
prime_wts = NULL;
|
||||
}
|
||||
/* if no pipeline cpu available to assign, leave task on prime */
|
||||
}
|
||||
|
||||
if (!prime_wts && is_prime_worthy(other_wts)) {
|
||||
/* promote other_wts to prime, it is worthy */
|
||||
swap_pipeline_with_prime_locked(NULL, other_wts);
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pipeline_pinning)
|
||||
goto out;
|
||||
|
||||
if (delay_rearrange(window_start, AUTO_PIPELINE, force))
|
||||
goto out;
|
||||
|
||||
if (!soc_feat(SOC_ENABLE_PIPELINE_SWAPPING_BIT) && !force)
|
||||
goto out;
|
||||
|
||||
/* swap prime for have_heavy_list >= 3 */
|
||||
find_prime_and_max_tasks(heavy_wts, &prime_wts, &other_wts);
|
||||
swap_pipeline_with_prime_locked(prime_wts, other_wts);
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&heavy_lock, flags);
|
||||
}
|
||||
|
||||
void rearrange_pipeline_preferred_cpus(u64 window_start)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct walt_task_struct *wts;
|
||||
bool set_unisolation = false;
|
||||
u32 max_demand = 0;
|
||||
struct walt_task_struct *prime_wts = NULL;
|
||||
struct walt_task_struct *other_wts = NULL;
|
||||
static int assign_cpu = -1;
|
||||
static bool last_set_unisolation;
|
||||
int i;
|
||||
|
||||
if (sysctl_sched_heavy_nr || sysctl_sched_pipeline_util_thres)
|
||||
return;
|
||||
|
||||
if (num_sched_clusters < 2)
|
||||
return;
|
||||
|
||||
if (!pipeline_nr || sched_boost_type)
|
||||
goto out;
|
||||
|
||||
if (delay_rearrange(window_start, MANUAL_PIPELINE, false))
|
||||
goto out;
|
||||
|
||||
raw_spin_lock_irqsave(&pipeline_lock, flags);
|
||||
|
||||
set_unisolation = true;
|
||||
|
||||
for (i = 0; i < WALT_NR_CPUS; i++) {
|
||||
wts = pipeline_wts[i];
|
||||
|
||||
if (!wts)
|
||||
continue;
|
||||
|
||||
if (!wts->grp)
|
||||
wts->pipeline_cpu = -1;
|
||||
|
||||
/*
|
||||
* assummes that if one pipeline doesn't have preferred set,
|
||||
* all pipelines too do not have it set
|
||||
*/
|
||||
if (wts->pipeline_cpu == -1) {
|
||||
assign_cpu = cpumask_next_and(assign_cpu,
|
||||
&cpus_for_pipeline, cpu_online_mask);
|
||||
|
||||
if (assign_cpu >= nr_cpu_ids)
|
||||
/* reset and rotate the cpus */
|
||||
assign_cpu = cpumask_next_and(-1,
|
||||
&cpus_for_pipeline, cpu_online_mask);
|
||||
|
||||
if (assign_cpu >= nr_cpu_ids)
|
||||
wts->pipeline_cpu = -1;
|
||||
else
|
||||
wts->pipeline_cpu = assign_cpu;
|
||||
}
|
||||
|
||||
if (wts->pipeline_cpu != -1) {
|
||||
if (is_max_possible_cluster_cpu(wts->pipeline_cpu)) {
|
||||
/* assumes just one prime */
|
||||
prime_wts = wts;
|
||||
} else if (pipeline_demand(wts) > max_demand) {
|
||||
max_demand = pipeline_demand(wts);
|
||||
other_wts = wts;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pipeline_nr <= 2) {
|
||||
set_unisolation = false;
|
||||
if (prime_wts && !is_prime_worthy(prime_wts)) {
|
||||
/* demote prime_wts, it is not worthy */
|
||||
assign_cpu = cpumask_next_and(assign_cpu,
|
||||
&cpus_for_pipeline, cpu_online_mask);
|
||||
if (assign_cpu >= nr_cpu_ids)
|
||||
/* reset and rotate the cpus */
|
||||
assign_cpu = cpumask_next_and(-1,
|
||||
&cpus_for_pipeline, cpu_online_mask);
|
||||
if (assign_cpu >= nr_cpu_ids)
|
||||
prime_wts->pipeline_cpu = -1;
|
||||
else
|
||||
prime_wts->pipeline_cpu = assign_cpu;
|
||||
prime_wts = NULL;
|
||||
}
|
||||
|
||||
if (!prime_wts && is_prime_worthy(other_wts)) {
|
||||
/* promote other_wts to prime, it is worthy */
|
||||
swap_pipeline_with_prime_locked(NULL, other_wts);
|
||||
set_unisolation = true;
|
||||
}
|
||||
|
||||
if (prime_wts)
|
||||
set_unisolation = true;
|
||||
|
||||
goto release_lock;
|
||||
}
|
||||
|
||||
/* swap prime for nr_piprline >= 3 */
|
||||
swap_pipeline_with_prime_locked(prime_wts, other_wts);
|
||||
|
||||
if (trace_sched_pipeline_tasks_enabled()) {
|
||||
for (i = 0; i < WALT_NR_CPUS; i++) {
|
||||
if (pipeline_wts[i] != NULL)
|
||||
trace_sched_pipeline_tasks(MANUAL_PIPELINE, i, pipeline_wts[i],
|
||||
pipeline_nr, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
release_lock:
|
||||
raw_spin_unlock_irqrestore(&pipeline_lock, flags);
|
||||
|
||||
out:
|
||||
if (set_unisolation ^ last_set_unisolation) {
|
||||
pipeline_set_unisolation(set_unisolation, MANUAL_PIPELINE);
|
||||
last_set_unisolation = set_unisolation;
|
||||
}
|
||||
}
|
||||
|
||||
bool pipeline_check(struct walt_rq *wrq)
|
||||
{
|
||||
/* found_topapp should force rearrangement */
|
||||
bool found_topapp = find_heaviest_topapp(wrq->window_start);
|
||||
|
||||
rearrange_pipeline_preferred_cpus(wrq->window_start);
|
||||
pipeline_reset_unisolation_state();
|
||||
|
||||
return found_topapp;
|
||||
}
|
||||
|
||||
void pipeline_rearrange(struct walt_rq *wrq, bool found_topapp)
|
||||
{
|
||||
assign_heaviest_topapp(found_topapp);
|
||||
rearrange_heavy(wrq->window_start, found_topapp);
|
||||
}
|
||||
|
||||
bool enable_load_sync(int cpu)
|
||||
{
|
||||
if (!cpumask_test_cpu(cpu, &pipeline_sync_cpus))
|
||||
return false;
|
||||
|
||||
if (!pipeline_in_progress())
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Under manual pipeline, only load sync between the pipeline_sync_cpus, if at least one
|
||||
* of the CPUs userspace has allocated for pipeline tasks corresponds to the
|
||||
* pipeline_sync_cpus
|
||||
*/
|
||||
if (!sysctl_sched_heavy_nr && !sysctl_sched_pipeline_util_thres &&
|
||||
!cpumask_intersects(&pipeline_sync_cpus, &cpus_for_pipeline))
|
||||
return false;
|
||||
|
||||
/* Ensure to load sync only if there are 3 auto pipeline tasks */
|
||||
if (have_heavy_list)
|
||||
return have_heavy_list == MAX_NR_PIPELINE;
|
||||
|
||||
/*
|
||||
* If auto pipeline is disabled, manual must be on. Ensure to load sync under manual
|
||||
* pipeline only if there are 3 or more pipeline tasks
|
||||
*/
|
||||
return pipeline_nr >= MAX_NR_PIPELINE;
|
||||
}
|
||||
|
||||
/*
|
||||
* pipeline_fits_smaller_cpus evaluates if a pipeline task should be treated as a misfit.
|
||||
* There are three possible outcomes:
|
||||
* - ret -1: Continue evaluation with task_fits_max().
|
||||
* - ret 0: Task should be treated as a misfit (does not fit on smaller CPUs).
|
||||
* - ret 1: Task cannot be treated as a misfit (fits on smaller CPUs).
|
||||
*
|
||||
* If the task is assigned a pipeline CPU which is a prime CPU, ret should be 0, indicating
|
||||
* the task is a misfit.
|
||||
* If the number of pipeline tasks is 2 or fewer, continue evaluation of task_fits_max().
|
||||
* If the number of pipeline tasks is 3 or more, ret should be 1, indicating the task fits on the
|
||||
* smaller CPUs and is not a misfit.
|
||||
*/
|
||||
int pipeline_fits_smaller_cpus(struct task_struct *p)
|
||||
{
|
||||
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
||||
unsigned int pipeline_cpu = wts->pipeline_cpu;
|
||||
|
||||
if (pipeline_cpu == -1)
|
||||
return -1;
|
||||
|
||||
if (cpumask_test_cpu(pipeline_cpu, &cpu_array[0][num_sched_clusters-1]))
|
||||
return 0;
|
||||
|
||||
if (have_heavy_list) {
|
||||
if (have_heavy_list == MAX_NR_PIPELINE)
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pipeline_nr >= MAX_NR_PIPELINE)
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
}
|
176
kernel/sched/walt/preemptirq_long.c
Normal file
176
kernel/sched/walt/preemptirq_long.c
Normal file
@@ -0,0 +1,176 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2020-2021 The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <trace/hooks/preemptirq.h>
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "preemptirq_long.h"
|
||||
|
||||
#define IRQSOFF_SENTINEL 0x0fffDEAD
|
||||
|
||||
static unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000;
|
||||
static unsigned int sysctl_irqsoff_tracing_threshold_ns = 5000000;
|
||||
static unsigned int sysctl_irqsoff_dmesg_output_enabled;
|
||||
static unsigned int sysctl_irqsoff_crash_sentinel_value;
|
||||
static unsigned int sysctl_irqsoff_crash_threshold_ns = 10000000;
|
||||
|
||||
static unsigned int half_million = 500000;
|
||||
static unsigned int one_hundred_million = 100000000;
|
||||
static unsigned int one_million = 1000000;
|
||||
|
||||
static DEFINE_PER_CPU(u64, irq_disabled_ts);
|
||||
|
||||
/*
|
||||
* preemption disable tracking require additional context
|
||||
* to rule out false positives. see the comment in
|
||||
* test_preempt_disable_long() for more details.
|
||||
*/
|
||||
struct preempt_store {
|
||||
u64 ts;
|
||||
int pid;
|
||||
unsigned long ncsw;
|
||||
};
|
||||
static DEFINE_PER_CPU(struct preempt_store, the_ps);
|
||||
|
||||
static void note_irq_disable(void *u1, unsigned long u2, unsigned long u3)
|
||||
{
|
||||
if (is_idle_task(current))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We just have to note down the time stamp here. We
|
||||
* use stacktrace trigger feature to print the stacktrace.
|
||||
*/
|
||||
this_cpu_write(irq_disabled_ts, sched_clock());
|
||||
}
|
||||
|
||||
static void test_irq_disable_long(void *u1, unsigned long ip, unsigned long parent_ip)
|
||||
{
|
||||
u64 ts = this_cpu_read(irq_disabled_ts);
|
||||
|
||||
if (!ts)
|
||||
return;
|
||||
|
||||
this_cpu_write(irq_disabled_ts, 0);
|
||||
ts = sched_clock() - ts;
|
||||
|
||||
if (ts > sysctl_irqsoff_tracing_threshold_ns) {
|
||||
trace_irq_disable_long(ts, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
|
||||
|
||||
if (sysctl_irqsoff_dmesg_output_enabled == IRQSOFF_SENTINEL)
|
||||
printk_deferred("irqs off exceeds thresh delta=%llu C:(%ps<-%ps<-%ps<-%ps)\n",
|
||||
ts, (void *)CALLER_ADDR2,
|
||||
(void *)CALLER_ADDR3,
|
||||
(void *)CALLER_ADDR4,
|
||||
(void *)CALLER_ADDR5);
|
||||
}
|
||||
|
||||
if (sysctl_irqsoff_crash_sentinel_value == IRQSOFF_SENTINEL &&
|
||||
ts > sysctl_irqsoff_crash_threshold_ns) {
|
||||
printk_deferred("delta=%llu(ns) > crash_threshold=%u(ns) Task=%s\n",
|
||||
ts, sysctl_irqsoff_crash_threshold_ns,
|
||||
current->comm);
|
||||
BUG_ON(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void note_preempt_disable(void *u1, unsigned long u2, unsigned long u3)
|
||||
{
|
||||
struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
|
||||
|
||||
ps->ts = sched_clock();
|
||||
ps->pid = current->pid;
|
||||
ps->ncsw = current->nvcsw + current->nivcsw;
|
||||
}
|
||||
|
||||
static void test_preempt_disable_long(void *u1, unsigned long ip,
|
||||
unsigned long parent_ip)
|
||||
{
|
||||
struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id());
|
||||
u64 delta = 0;
|
||||
|
||||
if (!ps->ts)
|
||||
return;
|
||||
|
||||
/*
|
||||
* schedule() calls __schedule() with preemption disabled.
|
||||
* if we had entered idle and exiting idle now, we think
|
||||
* preemption is disabled the whole time. Detect this by
|
||||
* checking if the preemption is disabled across the same
|
||||
* task. There is a possiblity that the same task is scheduled
|
||||
* after idle. To rule out this possibility, compare the
|
||||
* context switch count also.
|
||||
*/
|
||||
if (ps->pid == current->pid && (ps->ncsw == current->nvcsw +
|
||||
current->nivcsw))
|
||||
delta = sched_clock() - ps->ts;
|
||||
|
||||
ps->ts = 0;
|
||||
if (delta > sysctl_preemptoff_tracing_threshold_ns)
|
||||
trace_preempt_disable_long(delta, ip, parent_ip, CALLER_ADDR4, CALLER_ADDR5);
|
||||
}
|
||||
|
||||
static struct ctl_table preemptirq_long_table[] = {
|
||||
{
|
||||
.procname = "preemptoff_tracing_threshold_ns",
|
||||
.data = &sysctl_preemptoff_tracing_threshold_ns,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "irqsoff_tracing_threshold_ns",
|
||||
.data = &sysctl_irqsoff_tracing_threshold_ns,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_douintvec_minmax,
|
||||
.extra1 = &half_million,
|
||||
.extra2 = &one_hundred_million,
|
||||
},
|
||||
{
|
||||
.procname = "irqsoff_dmesg_output_enabled",
|
||||
.data = &sysctl_irqsoff_dmesg_output_enabled,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "irqsoff_crash_sentinel_value",
|
||||
.data = &sysctl_irqsoff_crash_sentinel_value,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "irqsoff_crash_threshold_ns",
|
||||
.data = &sysctl_irqsoff_crash_threshold_ns,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_douintvec_minmax,
|
||||
.extra1 = &one_million,
|
||||
.extra2 = &one_hundred_million,
|
||||
},
|
||||
};
|
||||
|
||||
int preemptirq_long_init(void)
|
||||
{
|
||||
if (!register_sysctl("preemptirq", preemptirq_long_table)) {
|
||||
pr_err("Fail to register sysctl table\n");
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
register_trace_android_rvh_irqs_disable(note_irq_disable, NULL);
|
||||
register_trace_android_rvh_irqs_enable(test_irq_disable_long, NULL);
|
||||
register_trace_android_rvh_preempt_disable(note_preempt_disable, NULL);
|
||||
register_trace_android_rvh_preempt_enable(test_preempt_disable_long,
|
||||
NULL);
|
||||
|
||||
return 0;
|
||||
}
|
60
kernel/sched/walt/preemptirq_long.h
Normal file
60
kernel/sched/walt/preemptirq_long.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2021 The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM preemptirq_long
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
|
||||
#if !defined(_TRACE_PREEMPTIRQ_LONG_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_PREEMPTIRQ_LONG_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
/* reference preemptirq_template */
|
||||
DECLARE_EVENT_CLASS(preemptirq_long_template,
|
||||
|
||||
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
|
||||
unsigned long pparent_ip, unsigned long ppparent_ip),
|
||||
|
||||
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, delta)
|
||||
__field(unsigned long, caller_offs)
|
||||
__field(unsigned long, parent_offs)
|
||||
__field(unsigned long, pparent_offs)
|
||||
__field(unsigned long, ppparent_offs)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->delta = delta;
|
||||
__entry->caller_offs = ip;
|
||||
__entry->parent_offs = parent_ip;
|
||||
__entry->pparent_offs = pparent_ip;
|
||||
__entry->ppparent_offs = ppparent_ip;
|
||||
),
|
||||
|
||||
TP_printk("delta=%llu(ns) caller=%ps <- %ps <- %ps <- %ps",
|
||||
__entry->delta, (void *)__entry->caller_offs,
|
||||
(void *)__entry->parent_offs, (void *)__entry->pparent_offs,
|
||||
(void *)__entry->ppparent_offs)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(preemptirq_long_template, irq_disable_long,
|
||||
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
|
||||
unsigned long pparent_ip, unsigned long ppparent_ip),
|
||||
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
|
||||
|
||||
DEFINE_EVENT(preemptirq_long_template, preempt_disable_long,
|
||||
TP_PROTO(u64 delta, unsigned long ip, unsigned long parent_ip,
|
||||
unsigned long pparent_ip, unsigned long ppparent_ip),
|
||||
TP_ARGS(delta, ip, parent_ip, pparent_ip, ppparent_ip));
|
||||
|
||||
#endif /* _TRACE_PREEMPTIRQ_LONG_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
397
kernel/sched/walt/sched_avg.c
Normal file
397
kernel/sched/walt/sched_avg.c
Normal file
@@ -0,0 +1,397 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2012, 2015-2021, The Linux Foundation. All rights reserved.
|
||||
* Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Scheduler hook for average runqueue determination
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/math64.h>
|
||||
|
||||
#include "walt.h"
|
||||
#include "trace.h"
|
||||
|
||||
static DEFINE_PER_CPU(u64, nr_prod_sum);
|
||||
static DEFINE_PER_CPU(u64, last_time);
|
||||
static DEFINE_PER_CPU(int, last_time_cpu);
|
||||
static DEFINE_PER_CPU(u64, nr_big_prod_sum);
|
||||
static DEFINE_PER_CPU(u64, nr_trailblazer_prod_sum);
|
||||
static DEFINE_PER_CPU(u64, nr);
|
||||
static DEFINE_PER_CPU(u64, nr_max);
|
||||
|
||||
static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
|
||||
static s64 last_get_time;
|
||||
|
||||
static DEFINE_PER_CPU(atomic64_t, busy_hyst_end_time) = ATOMIC64_INIT(0);
|
||||
|
||||
static DEFINE_PER_CPU(u64, hyst_time);
|
||||
static DEFINE_PER_CPU(u64, coloc_hyst_busy);
|
||||
static DEFINE_PER_CPU(u64, coloc_hyst_time);
|
||||
static DEFINE_PER_CPU(u64, util_hyst_time);
|
||||
static DEFINE_PER_CPU(u64, smart_freq_legacy_reason_hyst_ns);
|
||||
|
||||
#define NR_THRESHOLD_PCT 40
|
||||
#define NR_THRESHOLD_TRAIL_PCT 80
|
||||
#define MAX_RTGB_TIME (sysctl_sched_coloc_busy_hyst_max_ms * NSEC_PER_MSEC)
|
||||
|
||||
struct sched_avg_stats stats[WALT_NR_CPUS];
|
||||
unsigned int cstats_util_pct[MAX_CLUSTERS];
|
||||
|
||||
u8 smart_freq_legacy_reason_hyst_ms[LEGACY_SMART_FREQ][WALT_NR_CPUS];
|
||||
|
||||
/**
|
||||
* sched_get_cluster_util_pct
|
||||
* @return: provide the percentage of this cluter that was used in the
|
||||
* previous window.
|
||||
*
|
||||
* This routine may be called any number of times as needed during
|
||||
* a window, but will always return the same result until window
|
||||
* rollover.
|
||||
*/
|
||||
unsigned int sched_get_cluster_util_pct(struct walt_sched_cluster *cluster)
|
||||
{
|
||||
unsigned int cluster_util_pct = 0;
|
||||
|
||||
if (cluster->id < MAX_CLUSTERS)
|
||||
cluster_util_pct = cstats_util_pct[cluster->id];
|
||||
|
||||
return cluster_util_pct;
|
||||
}
|
||||
|
||||
bool trailblazer_state;
|
||||
/**
|
||||
* sched_get_nr_running_avg
|
||||
* @return: Average nr_running, iowait and nr_big_tasks value since last poll.
|
||||
* Returns the avg * 100 to return up to two decimal points
|
||||
* of accuracy.
|
||||
*
|
||||
* Obtains the average nr_running value since the last poll.
|
||||
* This function may not be called concurrently with itself.
|
||||
*
|
||||
* It is assumed that this function is called at most once per window
|
||||
* rollover.
|
||||
*/
|
||||
struct sched_avg_stats *sched_get_nr_running_avg(void)
|
||||
{
|
||||
int cpu;
|
||||
u64 curr_time = sched_clock();
|
||||
u64 period = curr_time - last_get_time;
|
||||
u64 tmp_nr, tmp_misfit, tmp_trailblazer;
|
||||
bool any_hyst_time = false;
|
||||
struct walt_sched_cluster *cluster;
|
||||
bool trailblazer_cpu = false;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return NULL;
|
||||
|
||||
if (!period)
|
||||
goto done;
|
||||
|
||||
/* read and reset nr_running counts */
|
||||
for_each_possible_cpu(cpu) {
|
||||
unsigned long flags;
|
||||
u64 diff;
|
||||
|
||||
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
|
||||
curr_time = sched_clock();
|
||||
diff = curr_time - per_cpu(last_time, cpu);
|
||||
if ((s64)diff < 0) {
|
||||
printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
|
||||
cpu, curr_time, curr_time, per_cpu(last_time, cpu),
|
||||
per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
|
||||
WALT_PANIC(1);
|
||||
}
|
||||
|
||||
tmp_nr = per_cpu(nr_prod_sum, cpu);
|
||||
tmp_nr += per_cpu(nr, cpu) * diff;
|
||||
tmp_nr = div64_u64((tmp_nr * 100), period);
|
||||
|
||||
tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
|
||||
tmp_misfit += walt_big_tasks(cpu) * diff;
|
||||
tmp_misfit = div64_u64((tmp_misfit * 100), period);
|
||||
|
||||
tmp_trailblazer = per_cpu(nr_trailblazer_prod_sum, cpu);
|
||||
tmp_trailblazer += walt_trailblazer_tasks(cpu) * diff;
|
||||
tmp_trailblazer = div64_u64((tmp_trailblazer * 100), period);
|
||||
|
||||
/*
|
||||
* NR_THRESHOLD_PCT is to make sure that the task ran
|
||||
* at least 85% in the last window to compensate any
|
||||
* over estimating being done.
|
||||
*/
|
||||
stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
|
||||
100);
|
||||
stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
|
||||
NR_THRESHOLD_PCT), 100);
|
||||
trailblazer_cpu |= (int)div64_u64((tmp_trailblazer +
|
||||
NR_THRESHOLD_TRAIL_PCT), 100);
|
||||
|
||||
stats[cpu].nr_max = per_cpu(nr_max, cpu);
|
||||
stats[cpu].nr_scaled = tmp_nr;
|
||||
|
||||
trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
|
||||
stats[cpu].nr_misfit, stats[cpu].nr_max,
|
||||
stats[cpu].nr_scaled, trailblazer_cpu);
|
||||
|
||||
per_cpu(last_time, cpu) = curr_time;
|
||||
per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
|
||||
per_cpu(nr_prod_sum, cpu) = 0;
|
||||
per_cpu(nr_big_prod_sum, cpu) = 0;
|
||||
per_cpu(nr_trailblazer_prod_sum, cpu) = 0;
|
||||
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
|
||||
|
||||
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
|
||||
}
|
||||
|
||||
trailblazer_state = trailblazer_cpu;
|
||||
/* collect cluster load stats */
|
||||
for_each_sched_cluster(cluster) {
|
||||
unsigned int num_cpus = cpumask_weight(&cluster->cpus);
|
||||
unsigned int sum_util_pct = 0;
|
||||
|
||||
/* load is already scaled, see freq_policy_load/prev_runnable_sum */
|
||||
for_each_cpu(cpu, &cluster->cpus) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
||||
|
||||
/* compute the % this cpu's utilization of the cpu capacity,
|
||||
* and sum it across all cpus
|
||||
*/
|
||||
sum_util_pct +=
|
||||
(wrq->util * 100) / arch_scale_cpu_capacity(cpu);
|
||||
}
|
||||
|
||||
/* calculate the averge per-cpu utilization */
|
||||
cstats_util_pct[cluster->id] = sum_util_pct / num_cpus;
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (per_cpu(coloc_hyst_time, cpu)) {
|
||||
any_hyst_time = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (any_hyst_time && get_rtgb_active_time() >= MAX_RTGB_TIME)
|
||||
sched_update_hyst_times();
|
||||
|
||||
last_get_time = curr_time;
|
||||
|
||||
done:
|
||||
return &stats[0];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_get_nr_running_avg);
|
||||
|
||||
void sched_update_hyst_times(void)
|
||||
{
|
||||
bool rtgb_active;
|
||||
int cpu;
|
||||
unsigned long cpu_cap, coloc_busy_pct;
|
||||
|
||||
rtgb_active = is_rtgb_active() && (sched_boost_type != CONSERVATIVE_BOOST)
|
||||
&& (get_rtgb_active_time() < MAX_RTGB_TIME);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
cpu_cap = arch_scale_cpu_capacity(cpu);
|
||||
coloc_busy_pct = sysctl_sched_coloc_busy_hyst_cpu_busy_pct[cpu];
|
||||
per_cpu(hyst_time, cpu) = (BIT(cpu)
|
||||
& sysctl_sched_busy_hyst_enable_cpus) ?
|
||||
sysctl_sched_busy_hyst : 0;
|
||||
per_cpu(coloc_hyst_time, cpu) = ((BIT(cpu)
|
||||
& sysctl_sched_coloc_busy_hyst_enable_cpus)
|
||||
&& rtgb_active) ?
|
||||
sysctl_sched_coloc_busy_hyst_cpu[cpu] : 0;
|
||||
per_cpu(coloc_hyst_busy, cpu) = mult_frac(cpu_cap,
|
||||
coloc_busy_pct, 100);
|
||||
per_cpu(util_hyst_time, cpu) = (BIT(cpu)
|
||||
& sysctl_sched_util_busy_hyst_enable_cpus) ?
|
||||
sysctl_sched_util_busy_hyst_cpu[cpu] : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define BUSY_NR_RUN 3
|
||||
#define BUSY_LOAD_FACTOR 10
|
||||
static inline void update_busy_hyst_end_time(int cpu, int enq,
|
||||
unsigned long prev_nr_run, u64 curr_time)
|
||||
{
|
||||
bool nr_run_trigger = false;
|
||||
bool load_trigger = false, coloc_load_trigger = false;
|
||||
u64 agg_hyst_time, total_util = 0;
|
||||
bool util_load_trigger = false;
|
||||
int i;
|
||||
bool hyst_trigger, coloc_trigger;
|
||||
bool dequeue = (enq < 0);
|
||||
|
||||
if (is_max_possible_cluster_cpu(cpu) && is_obet)
|
||||
return;
|
||||
|
||||
if (!per_cpu(hyst_time, cpu) && !per_cpu(coloc_hyst_time, cpu) &&
|
||||
!per_cpu(util_hyst_time, cpu) && !per_cpu(smart_freq_legacy_reason_hyst_ns, cpu))
|
||||
return;
|
||||
|
||||
if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
|
||||
nr_run_trigger = true;
|
||||
|
||||
if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
|
||||
capacity_orig_of(cpu))
|
||||
load_trigger = true;
|
||||
|
||||
if (dequeue && cpu_util(cpu) > per_cpu(coloc_hyst_busy, cpu))
|
||||
coloc_load_trigger = true;
|
||||
|
||||
if (dequeue) {
|
||||
for_each_possible_cpu(i) {
|
||||
total_util += cpu_util(i);
|
||||
if (total_util >= sysctl_sched_util_busy_hyst_cpu_util[cpu]) {
|
||||
util_load_trigger = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
coloc_trigger = nr_run_trigger || coloc_load_trigger;
|
||||
#if IS_ENABLED(CONFIG_SCHED_CONSERVATIVE_BOOST_LPM_BIAS)
|
||||
hyst_trigger = nr_run_trigger || load_trigger || (sched_boost_type == CONSERVATIVE_BOOST);
|
||||
#else
|
||||
hyst_trigger = nr_run_trigger || load_trigger;
|
||||
#endif
|
||||
|
||||
agg_hyst_time = max(max(hyst_trigger ? per_cpu(hyst_time, cpu) : 0,
|
||||
coloc_trigger ? per_cpu(coloc_hyst_time, cpu) : 0),
|
||||
util_load_trigger ? per_cpu(util_hyst_time, cpu) : 0);
|
||||
agg_hyst_time = max(agg_hyst_time, per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
|
||||
|
||||
if (agg_hyst_time) {
|
||||
atomic64_set(&per_cpu(busy_hyst_end_time, cpu),
|
||||
curr_time + agg_hyst_time);
|
||||
trace_sched_busy_hyst_time(cpu, agg_hyst_time, prev_nr_run,
|
||||
cpu_util(cpu), per_cpu(hyst_time, cpu),
|
||||
per_cpu(coloc_hyst_time, cpu),
|
||||
per_cpu(util_hyst_time, cpu),
|
||||
per_cpu(smart_freq_legacy_reason_hyst_ns, cpu));
|
||||
}
|
||||
}
|
||||
|
||||
int sched_busy_hyst_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (table->maxlen > (sizeof(unsigned int) * num_possible_cpus()))
|
||||
table->maxlen = sizeof(unsigned int) * num_possible_cpus();
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (!ret && write)
|
||||
sched_update_hyst_times();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sched_update_nr_prod
|
||||
* @cpu: The core id of the nr running driver.
|
||||
* @enq: enqueue/dequeue/misfit happening on this CPU.
|
||||
* @return: N/A
|
||||
*
|
||||
* Update average with latest nr_running value for CPU
|
||||
*/
|
||||
void sched_update_nr_prod(int cpu, int enq)
|
||||
{
|
||||
u64 diff;
|
||||
u64 curr_time;
|
||||
unsigned long flags, nr_running;
|
||||
|
||||
spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
|
||||
nr_running = per_cpu(nr, cpu);
|
||||
curr_time = sched_clock();
|
||||
diff = curr_time - per_cpu(last_time, cpu);
|
||||
if ((s64)diff < 0) {
|
||||
printk_deferred("WALT-BUG CPU%d; curr_time=%llu(0x%llx) is lesser than per_cpu_last_time=%llu(0x%llx) last_time_cpu=%d",
|
||||
cpu, curr_time, curr_time, per_cpu(last_time, cpu),
|
||||
per_cpu(last_time, cpu), per_cpu(last_time_cpu, cpu));
|
||||
WALT_PANIC(1);
|
||||
}
|
||||
per_cpu(last_time, cpu) = curr_time;
|
||||
per_cpu(last_time_cpu, cpu) = raw_smp_processor_id();
|
||||
per_cpu(nr, cpu) = cpu_rq(cpu)->nr_running + enq;
|
||||
|
||||
if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
|
||||
per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
|
||||
|
||||
/* Don't update hyst time for misfit tasks */
|
||||
if (enq)
|
||||
update_busy_hyst_end_time(cpu, enq, nr_running, curr_time);
|
||||
|
||||
per_cpu(nr_prod_sum, cpu) += nr_running * diff;
|
||||
per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
|
||||
per_cpu(nr_trailblazer_prod_sum, cpu) += (u64) walt_trailblazer_tasks(cpu) * diff;
|
||||
spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the CPU utilization % in the last window.
|
||||
*/
|
||||
unsigned int sched_get_cpu_util_pct(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
u64 util;
|
||||
unsigned long capacity, flags;
|
||||
unsigned int busy;
|
||||
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
|
||||
|
||||
raw_spin_lock_irqsave(&rq->__lock, flags);
|
||||
|
||||
capacity = capacity_orig_of(cpu);
|
||||
|
||||
util = wrq->prev_runnable_sum + wrq->grp_time.prev_runnable_sum;
|
||||
util = scale_time_to_util(util);
|
||||
raw_spin_unlock_irqrestore(&rq->__lock, flags);
|
||||
|
||||
util = (util >= capacity) ? capacity : util;
|
||||
busy = div64_ul((util * 100), capacity);
|
||||
return busy;
|
||||
}
|
||||
|
||||
int sched_lpm_disallowed_time(int cpu, u64 *timeout)
|
||||
{
|
||||
u64 now = sched_clock();
|
||||
u64 bias_end_time = atomic64_read(&per_cpu(busy_hyst_end_time, cpu));
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return -EAGAIN;
|
||||
|
||||
if (unlikely(is_reserved(cpu))) {
|
||||
*timeout = 10 * NSEC_PER_MSEC;
|
||||
return 0; /* shallowest c-state */
|
||||
}
|
||||
|
||||
if (now < bias_end_time) {
|
||||
*timeout = bias_end_time - now;
|
||||
return 0; /* shallowest c-state */
|
||||
}
|
||||
|
||||
return INT_MAX; /* don't care */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_lpm_disallowed_time);
|
||||
|
||||
void update_smart_freq_legacy_reason_hyst_time(struct walt_sched_cluster *cluster)
|
||||
{
|
||||
int cpu, i;
|
||||
u8 max_hyst_ms;
|
||||
|
||||
for_each_cpu(cpu, &cluster->cpus) {
|
||||
max_hyst_ms = 0;
|
||||
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
|
||||
if (cluster->smart_freq_info->cluster_active_reason & BIT(i))
|
||||
max_hyst_ms =
|
||||
max(smart_freq_legacy_reason_hyst_ms[i][cpu],
|
||||
max_hyst_ms);
|
||||
}
|
||||
per_cpu(smart_freq_legacy_reason_hyst_ns, cpu) = max_hyst_ms * NSEC_PER_MSEC;
|
||||
}
|
||||
}
|
589
kernel/sched/walt/smart_freq.c
Normal file
589
kernel/sched/walt/smart_freq.c
Normal file
@@ -0,0 +1,589 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/tick.h>
|
||||
#include "walt.h"
|
||||
#include "trace.h"
|
||||
#include <trace/events/power.h>
|
||||
|
||||
bool smart_freq_init_done;
|
||||
char reason_dump[1024];
|
||||
static DEFINE_MUTEX(freq_reason_mutex);
|
||||
|
||||
int sched_smart_freq_legacy_dump_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret = -EINVAL, pos = 0, i, j;
|
||||
|
||||
if (!smart_freq_init_done)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&freq_reason_mutex);
|
||||
for (j = 0; j < num_sched_clusters; j++) {
|
||||
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
|
||||
pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%llu:%d\n", j, i,
|
||||
default_freq_config[j].legacy_reason_config[i].freq_allowed,
|
||||
default_freq_config[j].legacy_reason_config[i].hyst_ns,
|
||||
!!(default_freq_config[j].smart_freq_participation_mask &
|
||||
BIT(i)));
|
||||
}
|
||||
}
|
||||
|
||||
ret = proc_dostring(table, write, buffer, lenp, ppos);
|
||||
mutex_unlock(&freq_reason_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_smart_freq_ipc_dump_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret = -EINVAL, pos = 0, i, j;
|
||||
|
||||
if (!smart_freq_init_done)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&freq_reason_mutex);
|
||||
|
||||
for (j = 0; j < num_sched_clusters; j++) {
|
||||
for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
|
||||
pos += snprintf(reason_dump + pos, 50, "%d:%d:%lu:%lu:%llu:%d\n", j, i,
|
||||
default_freq_config[j].ipc_reason_config[i].ipc,
|
||||
default_freq_config[j].ipc_reason_config[i].freq_allowed,
|
||||
default_freq_config[j].ipc_reason_config[i].hyst_ns,
|
||||
!!(default_freq_config[j].smart_freq_ipc_participation_mask &
|
||||
BIT(i)));
|
||||
}
|
||||
}
|
||||
|
||||
ret = proc_dostring(table, write, buffer, lenp, ppos);
|
||||
mutex_unlock(&freq_reason_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_smart_freq_ipc_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
int cluster_id = -1;
|
||||
unsigned long no_reason_freq;
|
||||
int i;
|
||||
unsigned int *data = (unsigned int *)table->data;
|
||||
int val[SMART_FMAX_IPC_MAX];
|
||||
struct ctl_table tmp = {
|
||||
.data = &val,
|
||||
.maxlen = sizeof(int) * SMART_FMAX_IPC_MAX,
|
||||
.mode = table->mode,
|
||||
};
|
||||
|
||||
if (!smart_freq_init_done)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&freq_reason_mutex);
|
||||
|
||||
if (!write) {
|
||||
tmp.data = table->data;
|
||||
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = -EINVAL;
|
||||
|
||||
if (data == &sysctl_ipc_freq_levels_cluster0[0])
|
||||
cluster_id = 0;
|
||||
if (data == &sysctl_ipc_freq_levels_cluster1[0])
|
||||
cluster_id = 1;
|
||||
if (data == &sysctl_ipc_freq_levels_cluster2[0])
|
||||
cluster_id = 2;
|
||||
if (data == &sysctl_ipc_freq_levels_cluster3[0])
|
||||
cluster_id = 3;
|
||||
if (cluster_id == -1)
|
||||
goto unlock;
|
||||
|
||||
if (val[0] < 0)
|
||||
goto unlock;
|
||||
|
||||
no_reason_freq = val[0];
|
||||
|
||||
/* Make sure all reasons freq are larger than NO_REASON */
|
||||
/* IPC/freq should be in increasing order */
|
||||
for (i = 1; i < SMART_FMAX_IPC_MAX; i++) {
|
||||
if (val[i] < val[i-1])
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
default_freq_config[cluster_id].legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed =
|
||||
no_reason_freq;
|
||||
|
||||
for (i = 0; i < SMART_FMAX_IPC_MAX; i++) {
|
||||
default_freq_config[cluster_id].ipc_reason_config[i].freq_allowed = val[i];
|
||||
data[i] = val[i];
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&freq_reason_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* return highest ipc of the cluster */
|
||||
unsigned int get_cluster_ipc_level_freq(int curr_cpu, u64 time)
|
||||
{
|
||||
int cpu, winning_cpu, cpu_ipc_level = 0, index = 0;
|
||||
struct walt_sched_cluster *cluster = cpu_cluster(curr_cpu);
|
||||
struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
|
||||
|
||||
if (!smart_freq_init_done)
|
||||
return 0;
|
||||
|
||||
for_each_cpu(cpu, &cluster->cpus) {
|
||||
cpu_ipc_level = per_cpu(ipc_level, cpu);
|
||||
|
||||
if ((time - per_cpu(last_ipc_update, cpu)) > 7999999ULL) {
|
||||
cpu_ipc_level = 0;
|
||||
per_cpu(tickless_mode, cpu) = true;
|
||||
} else {
|
||||
per_cpu(tickless_mode, cpu) = false;
|
||||
}
|
||||
|
||||
|
||||
if (cpu_ipc_level >= index) {
|
||||
winning_cpu = cpu;
|
||||
index = cpu_ipc_level;
|
||||
}
|
||||
}
|
||||
|
||||
smart_freq_info->cluster_ipc_level = index;
|
||||
|
||||
trace_ipc_freq(cluster->id, winning_cpu, index,
|
||||
smart_freq_info->ipc_reason_config[index].freq_allowed,
|
||||
time, per_cpu(ipc_deactivate_ns, winning_cpu), curr_cpu,
|
||||
per_cpu(ipc_cnt, curr_cpu));
|
||||
|
||||
return smart_freq_info->ipc_reason_config[index].freq_allowed;
|
||||
}
|
||||
|
||||
static inline bool has_internal_freq_limit_changed(struct walt_sched_cluster *cluster)
|
||||
{
|
||||
unsigned int internal_freq, ipc_freq;
|
||||
int i;
|
||||
struct smart_freq_cluster_info *smci = cluster->smart_freq_info;
|
||||
|
||||
internal_freq = cluster->walt_internal_freq_limit;
|
||||
cluster->walt_internal_freq_limit = cluster->max_freq;
|
||||
|
||||
for (i = 0; i < MAX_FREQ_CAP; i++)
|
||||
cluster->walt_internal_freq_limit = min(freq_cap[i][cluster->id],
|
||||
cluster->walt_internal_freq_limit);
|
||||
|
||||
ipc_freq = smci->ipc_reason_config[smci->cluster_ipc_level].freq_allowed;
|
||||
cluster->walt_internal_freq_limit = max(ipc_freq,
|
||||
cluster->walt_internal_freq_limit);
|
||||
|
||||
return cluster->walt_internal_freq_limit != internal_freq;
|
||||
}
|
||||
|
||||
void update_smart_freq_capacities_one_cluster(struct walt_sched_cluster *cluster)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!smart_freq_init_done)
|
||||
return;
|
||||
|
||||
if (has_internal_freq_limit_changed(cluster)) {
|
||||
for_each_cpu(cpu, &cluster->cpus)
|
||||
update_cpu_capacity_helper(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
void update_smart_freq_capacities(void)
|
||||
{
|
||||
struct walt_sched_cluster *cluster;
|
||||
|
||||
if (!smart_freq_init_done)
|
||||
return;
|
||||
|
||||
for_each_sched_cluster(cluster)
|
||||
update_smart_freq_capacities_one_cluster(cluster);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the active smart freq reason for the cluster.
|
||||
*/
|
||||
static void smart_freq_update_one_cluster(struct walt_sched_cluster *cluster,
|
||||
uint32_t current_reasons, u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
|
||||
{
|
||||
uint32_t current_reason, cluster_active_reason;
|
||||
struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
|
||||
unsigned long max_cap =
|
||||
smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
|
||||
int max_reason, i;
|
||||
unsigned long old_freq_cap = freq_cap[SMART_FREQ][cluster->id];
|
||||
struct rq *rq;
|
||||
char smart_freq[25] = {0};
|
||||
char smart_freq_reason[25] = {0};
|
||||
|
||||
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
|
||||
current_reason = current_reasons & BIT(i);
|
||||
cluster_active_reason = smart_freq_info->cluster_active_reason & BIT(i);
|
||||
|
||||
if (current_reason) {
|
||||
smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
|
||||
smart_freq_info->cluster_active_reason |= BIT(i);
|
||||
|
||||
if (i == TRAILBLAZER_SMART_FREQ)
|
||||
trail_active = true;
|
||||
else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
|
||||
sustain_active = true;
|
||||
|
||||
} else if (cluster_active_reason) {
|
||||
if (!smart_freq_info->legacy_reason_status[i].deactivate_ns)
|
||||
smart_freq_info->legacy_reason_status[i].deactivate_ns = wallclock;
|
||||
}
|
||||
|
||||
if (cluster_active_reason) {
|
||||
/*
|
||||
* For reasons with deactivation hysteresis, check here if we have
|
||||
* crossed the hysteresis time and then deactivate the reason.
|
||||
* We are relying on scheduler tick path to call this function
|
||||
* thus deactivation of reason is only at tick
|
||||
* boundary.
|
||||
*/
|
||||
if (smart_freq_info->legacy_reason_status[i].deactivate_ns) {
|
||||
u64 delta = wallclock -
|
||||
smart_freq_info->legacy_reason_status[i].deactivate_ns;
|
||||
if (delta >= smart_freq_info->legacy_reason_config[i].hyst_ns) {
|
||||
smart_freq_info->legacy_reason_status[i].deactivate_ns = 0;
|
||||
smart_freq_info->cluster_active_reason &= ~BIT(i);
|
||||
|
||||
if (i == TRAILBLAZER_SMART_FREQ)
|
||||
trail_active = false;
|
||||
else if (i == SUSTAINED_HIGH_UTIL_SMART_FREQ)
|
||||
sustain_active = false;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (max_cap < smart_freq_info->legacy_reason_config[i].freq_allowed) {
|
||||
max_cap = smart_freq_info->legacy_reason_config[i].freq_allowed;
|
||||
max_reason = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (enable_logging) {
|
||||
snprintf(smart_freq, sizeof(smart_freq), "smart_fmax_%d", cluster->id);
|
||||
trace_clock_set_rate(smart_freq, max_cap, raw_smp_processor_id());
|
||||
|
||||
snprintf(smart_freq_reason, sizeof(smart_freq_reason), "legacy_reason_%d", cluster->id);
|
||||
trace_clock_set_rate(smart_freq_reason, max_reason, raw_smp_processor_id());
|
||||
}
|
||||
|
||||
trace_sched_freq_uncap(cluster->id, nr_big, wakeup_ctr_sum, current_reasons,
|
||||
smart_freq_info->cluster_active_reason, max_cap, max_reason);
|
||||
|
||||
if (old_freq_cap == max_cap)
|
||||
return;
|
||||
|
||||
freq_cap[SMART_FREQ][cluster->id] = max_cap;
|
||||
|
||||
rq = cpu_rq(cpumask_first(&cluster->cpus));
|
||||
/*
|
||||
* cpufreq smart freq doesn't call get_util for the cpu, hence
|
||||
* invoking callback without rq lock is safe.
|
||||
*/
|
||||
waltgov_run_callback(rq, WALT_CPUFREQ_SMART_FREQ_BIT);
|
||||
}
|
||||
|
||||
#define UNCAP_THRES 300000000
|
||||
#define UTIL_THRESHOLD 90
|
||||
static bool thres_based_uncap(u64 window_start, struct walt_sched_cluster *cluster)
|
||||
{
|
||||
int cpu;
|
||||
bool cluster_high_load = false, sustained_load = false;
|
||||
unsigned long freq_capacity, tgt_cap;
|
||||
unsigned long tgt_freq =
|
||||
cluster->smart_freq_info->legacy_reason_config[NO_REASON_SMART_FREQ].freq_allowed;
|
||||
struct walt_rq *wrq;
|
||||
|
||||
freq_capacity = arch_scale_cpu_capacity(cpumask_first(&cluster->cpus));
|
||||
tgt_cap = mult_frac(freq_capacity, tgt_freq, cluster->max_possible_freq);
|
||||
|
||||
for_each_cpu(cpu, &cluster->cpus) {
|
||||
wrq = &per_cpu(walt_rq, cpu);
|
||||
if (wrq->util >= mult_frac(tgt_cap, UTIL_THRESHOLD, 100)) {
|
||||
cluster_high_load = true;
|
||||
if (!cluster->found_ts)
|
||||
cluster->found_ts = window_start;
|
||||
else if ((window_start - cluster->found_ts) >= UNCAP_THRES)
|
||||
sustained_load = true;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!cluster_high_load)
|
||||
cluster->found_ts = 0;
|
||||
|
||||
return sustained_load;
|
||||
}
|
||||
|
||||
unsigned int big_task_cnt = 6;
|
||||
#define WAKEUP_CNT 100
|
||||
/*
|
||||
* reason is a two part bitmap
|
||||
* 15 - 0 : reason type
|
||||
* 31 - 16: changed state of reason
|
||||
* this will help to pass multiple reasons at once and avoid multiple calls.
|
||||
*/
|
||||
/*
|
||||
* This will be called from irq work path only
|
||||
*/
|
||||
void smart_freq_update_reason_common(u64 wallclock, int nr_big, u32 wakeup_ctr_sum)
|
||||
{
|
||||
struct walt_sched_cluster *cluster;
|
||||
bool current_state;
|
||||
uint32_t cluster_reasons;
|
||||
int i;
|
||||
int cluster_active_reason;
|
||||
uint32_t cluster_participation_mask;
|
||||
bool sustained_load = false;
|
||||
|
||||
if (!smart_freq_init_done)
|
||||
return;
|
||||
|
||||
for_each_sched_cluster(cluster)
|
||||
sustained_load |= thres_based_uncap(wallclock, cluster);
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
cluster_reasons = 0;
|
||||
i = cluster->id;
|
||||
cluster_participation_mask =
|
||||
cluster->smart_freq_info->smart_freq_participation_mask;
|
||||
/*
|
||||
* NO_REASON
|
||||
*/
|
||||
if (cluster_participation_mask & BIT(NO_REASON_SMART_FREQ))
|
||||
cluster_reasons |= BIT(NO_REASON_SMART_FREQ);
|
||||
|
||||
/*
|
||||
* BOOST
|
||||
*/
|
||||
if (cluster_participation_mask & BIT(BOOST_SMART_FREQ)) {
|
||||
current_state = is_storage_boost() || is_full_throttle_boost();
|
||||
if (current_state)
|
||||
cluster_reasons |= BIT(BOOST_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* TRAILBLAZER
|
||||
*/
|
||||
if (cluster_participation_mask & BIT(TRAILBLAZER_SMART_FREQ)) {
|
||||
current_state = trailblazer_state;
|
||||
if (current_state)
|
||||
cluster_reasons |= BIT(TRAILBLAZER_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* SBT
|
||||
*/
|
||||
if (cluster_participation_mask & BIT(SBT_SMART_FREQ)) {
|
||||
current_state = prev_is_sbt;
|
||||
if (current_state)
|
||||
cluster_reasons |= BIT(SBT_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* BIG_TASKCNT
|
||||
*/
|
||||
if (cluster_participation_mask & BIT(BIG_TASKCNT_SMART_FREQ)) {
|
||||
current_state = (nr_big >= big_task_cnt) &&
|
||||
(wakeup_ctr_sum < WAKEUP_CNT);
|
||||
if (current_state)
|
||||
cluster_reasons |= BIT(BIG_TASKCNT_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* SUSTAINED_HIGH_UTIL
|
||||
*/
|
||||
if (cluster_participation_mask & BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ)) {
|
||||
current_state = sustained_load;
|
||||
if (current_state)
|
||||
cluster_reasons |= BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* PIPELINE_60FPS_OR_LESSER
|
||||
*/
|
||||
if (cluster_participation_mask &
|
||||
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ)) {
|
||||
current_state = pipeline_in_progress() &&
|
||||
sched_ravg_window >= SCHED_RAVG_16MS_WINDOW;
|
||||
if (current_state)
|
||||
cluster_reasons |=
|
||||
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* PIPELINE_90FPS
|
||||
*/
|
||||
if (cluster_participation_mask &
|
||||
BIT(PIPELINE_90FPS_SMART_FREQ)) {
|
||||
current_state = pipeline_in_progress() &&
|
||||
sched_ravg_window == SCHED_RAVG_12MS_WINDOW;
|
||||
if (current_state)
|
||||
cluster_reasons |=
|
||||
BIT(PIPELINE_90FPS_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* PIPELINE_120FPS_OR_GREATER
|
||||
*/
|
||||
if (cluster_participation_mask &
|
||||
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ)) {
|
||||
current_state = pipeline_in_progress() &&
|
||||
sched_ravg_window == SCHED_RAVG_8MS_WINDOW;
|
||||
if (current_state)
|
||||
cluster_reasons |=
|
||||
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* THERMAL_ROTATION
|
||||
*/
|
||||
if (cluster_participation_mask & BIT(THERMAL_ROTATION_SMART_FREQ)) {
|
||||
current_state = (oscillate_cpu != -1);
|
||||
if (current_state)
|
||||
cluster_reasons |= BIT(THERMAL_ROTATION_SMART_FREQ);
|
||||
}
|
||||
|
||||
cluster_active_reason = cluster->smart_freq_info->cluster_active_reason;
|
||||
/* update the reasons for all the clusters */
|
||||
if (cluster_reasons || cluster_active_reason)
|
||||
smart_freq_update_one_cluster(cluster, cluster_reasons, wallclock,
|
||||
nr_big, wakeup_ctr_sum);
|
||||
}
|
||||
}
|
||||
|
||||
/* Common config for 4 cluster system */
|
||||
struct smart_freq_cluster_info default_freq_config[MAX_CLUSTERS];
|
||||
|
||||
void smart_freq_init(const char *name)
|
||||
{
|
||||
struct walt_sched_cluster *cluster;
|
||||
int i = 0, j;
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
cluster->smart_freq_info = &default_freq_config[i];
|
||||
cluster->smart_freq_info->smart_freq_participation_mask = BIT(NO_REASON_SMART_FREQ);
|
||||
cluster->smart_freq_info->cluster_active_reason = 0;
|
||||
cluster->smart_freq_info->min_cycles = 100;
|
||||
cluster->smart_freq_info->smart_freq_ipc_participation_mask = 0;
|
||||
freq_cap[SMART_FREQ][cluster->id] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
|
||||
memset(cluster->smart_freq_info->legacy_reason_status, 0,
|
||||
sizeof(struct smart_freq_legacy_reason_status) *
|
||||
LEGACY_SMART_FREQ);
|
||||
memset(cluster->smart_freq_info->legacy_reason_config, 0,
|
||||
sizeof(struct smart_freq_legacy_reason_config) *
|
||||
LEGACY_SMART_FREQ);
|
||||
memset(cluster->smart_freq_info->ipc_reason_config, 0,
|
||||
sizeof(struct smart_freq_ipc_reason_config) *
|
||||
SMART_FMAX_IPC_MAX);
|
||||
|
||||
for (j = 0; j < LEGACY_SMART_FREQ; j++) {
|
||||
cluster->smart_freq_info->legacy_reason_config[j].freq_allowed =
|
||||
FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
}
|
||||
for (j = 0; j < SMART_FMAX_IPC_MAX; j++) {
|
||||
cluster->smart_freq_info->ipc_reason_config[j].freq_allowed =
|
||||
FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
sysctl_ipc_freq_levels_cluster0[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
sysctl_ipc_freq_levels_cluster1[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
sysctl_ipc_freq_levels_cluster2[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
sysctl_ipc_freq_levels_cluster3[j] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (!strcmp(name, "SUN")) {
|
||||
for_each_sched_cluster(cluster) {
|
||||
if (cluster->id == 0) {
|
||||
/* Legacy */
|
||||
cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
|
||||
2400000;
|
||||
cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
|
||||
1000000000;
|
||||
cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
|
||||
1000000000;
|
||||
cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
|
||||
300000000;
|
||||
cluster->smart_freq_info->smart_freq_participation_mask |=
|
||||
BIT(BOOST_SMART_FREQ) |
|
||||
BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
|
||||
BIT(BIG_TASKCNT_SMART_FREQ) |
|
||||
BIT(TRAILBLAZER_SMART_FREQ) |
|
||||
BIT(SBT_SMART_FREQ) |
|
||||
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
|
||||
BIT(PIPELINE_90FPS_SMART_FREQ) |
|
||||
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
|
||||
BIT(THERMAL_ROTATION_SMART_FREQ);
|
||||
|
||||
/* IPC */
|
||||
cluster->smart_freq_info->ipc_reason_config[0].ipc = 120;
|
||||
cluster->smart_freq_info->ipc_reason_config[1].ipc = 180;
|
||||
cluster->smart_freq_info->ipc_reason_config[2].ipc = 220;
|
||||
cluster->smart_freq_info->ipc_reason_config[3].ipc = 260;
|
||||
cluster->smart_freq_info->ipc_reason_config[4].ipc = 300;
|
||||
cluster->smart_freq_info->smart_freq_ipc_participation_mask =
|
||||
BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
|
||||
BIT(IPC_E);
|
||||
cluster->smart_freq_info->min_cycles = 5806080;
|
||||
} else if (cluster->id == 1) {
|
||||
/* Legacy */
|
||||
cluster->smart_freq_info->legacy_reason_config[0].freq_allowed =
|
||||
3513600;
|
||||
cluster->smart_freq_info->legacy_reason_config[2].hyst_ns =
|
||||
1000000000;
|
||||
cluster->smart_freq_info->legacy_reason_config[3].hyst_ns =
|
||||
1000000000;
|
||||
cluster->smart_freq_info->legacy_reason_config[4].hyst_ns =
|
||||
300000000;
|
||||
cluster->smart_freq_info->smart_freq_participation_mask |=
|
||||
BIT(BOOST_SMART_FREQ) |
|
||||
BIT(SUSTAINED_HIGH_UTIL_SMART_FREQ) |
|
||||
BIT(BIG_TASKCNT_SMART_FREQ) |
|
||||
BIT(TRAILBLAZER_SMART_FREQ) |
|
||||
BIT(SBT_SMART_FREQ) |
|
||||
BIT(PIPELINE_60FPS_OR_LESSER_SMART_FREQ) |
|
||||
BIT(PIPELINE_90FPS_SMART_FREQ) |
|
||||
BIT(PIPELINE_120FPS_OR_GREATER_SMART_FREQ) |
|
||||
BIT(THERMAL_ROTATION_SMART_FREQ);
|
||||
|
||||
/* IPC */
|
||||
cluster->smart_freq_info->ipc_reason_config[0].ipc = 220;
|
||||
cluster->smart_freq_info->ipc_reason_config[1].ipc = 260;
|
||||
cluster->smart_freq_info->ipc_reason_config[2].ipc = 280;
|
||||
cluster->smart_freq_info->ipc_reason_config[3].ipc = 320;
|
||||
cluster->smart_freq_info->ipc_reason_config[4].ipc = 400;
|
||||
cluster->smart_freq_info->smart_freq_ipc_participation_mask =
|
||||
BIT(IPC_A) | BIT(IPC_B) | BIT(IPC_C) | BIT(IPC_D) |
|
||||
BIT(IPC_E);
|
||||
cluster->smart_freq_info->min_cycles = 7004160;
|
||||
}
|
||||
}
|
||||
}
|
||||
smart_freq_init_done = true;
|
||||
update_smart_freq_capacities();
|
||||
|
||||
}
|
1964
kernel/sched/walt/sysctl.c
Normal file
1964
kernel/sched/walt/sysctl.c
Normal file
File diff suppressed because it is too large
Load Diff
84
kernel/sched/walt/trace.c
Normal file
84
kernel/sched/walt/trace.c
Normal file
@@ -0,0 +1,84 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
static inline void __window_data(u32 *dst, u32 *src)
|
||||
{
|
||||
if (src)
|
||||
memcpy(dst, src, nr_cpu_ids * sizeof(u32));
|
||||
else
|
||||
memset(dst, 0, nr_cpu_ids * sizeof(u32));
|
||||
}
|
||||
|
||||
struct trace_seq;
|
||||
const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
|
||||
{
|
||||
int i;
|
||||
const char *ret = p->buffer + seq_buf_used(&p->seq);
|
||||
|
||||
for (i = 0; i < buf_len; i++)
|
||||
trace_seq_printf(p, "%u ", buf[i]);
|
||||
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
|
||||
{
|
||||
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
||||
|
||||
if (curr)
|
||||
if (new)
|
||||
return wrq->nt_curr_runnable_sum;
|
||||
else
|
||||
return wrq->curr_runnable_sum;
|
||||
else
|
||||
if (new)
|
||||
return wrq->nt_prev_runnable_sum;
|
||||
else
|
||||
return wrq->prev_runnable_sum;
|
||||
}
|
||||
|
||||
static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
|
||||
{
|
||||
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
||||
|
||||
if (curr)
|
||||
if (new)
|
||||
return wrq->grp_time.nt_curr_runnable_sum;
|
||||
else
|
||||
return wrq->grp_time.curr_runnable_sum;
|
||||
else
|
||||
if (new)
|
||||
return wrq->grp_time.nt_prev_runnable_sum;
|
||||
else
|
||||
return wrq->grp_time.prev_runnable_sum;
|
||||
}
|
||||
|
||||
static inline s64
|
||||
__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
|
||||
bool src, bool new, bool curr)
|
||||
{
|
||||
switch (migrate_type) {
|
||||
case RQ_TO_GROUP:
|
||||
if (src)
|
||||
return __rq_update_sum(rq, curr, new);
|
||||
else
|
||||
return __grp_update_sum(rq, curr, new);
|
||||
case GROUP_TO_RQ:
|
||||
if (src)
|
||||
return __grp_update_sum(rq, curr, new);
|
||||
else
|
||||
return __rq_update_sum(rq, curr, new);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
2060
kernel/sched/walt/trace.h
Normal file
2060
kernel/sched/walt/trace.h
Normal file
File diff suppressed because it is too large
Load Diff
5683
kernel/sched/walt/walt.c
Normal file
5683
kernel/sched/walt/walt.c
Normal file
File diff suppressed because it is too large
Load Diff
1545
kernel/sched/walt/walt.h
Normal file
1545
kernel/sched/walt/walt.h
Normal file
File diff suppressed because it is too large
Load Diff
1532
kernel/sched/walt/walt_cfs.c
Normal file
1532
kernel/sched/walt/walt_cfs.c
Normal file
File diff suppressed because it is too large
Load Diff
207
kernel/sched/walt/walt_config.c
Normal file
207
kernel/sched/walt/walt_config.c
Normal file
@@ -0,0 +1,207 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "walt.h"
|
||||
#include "trace.h"
|
||||
#include <soc/qcom/socinfo.h>
|
||||
|
||||
unsigned long __read_mostly soc_flags;
|
||||
unsigned int trailblazer_floor_freq[MAX_CLUSTERS];
|
||||
cpumask_t asym_cap_sibling_cpus;
|
||||
cpumask_t pipeline_sync_cpus;
|
||||
int oscillate_period_ns;
|
||||
int soc_sched_lib_name_capacity;
|
||||
#define PIPELINE_BUSY_THRESH_8MS_WINDOW 7
|
||||
#define PIPELINE_BUSY_THRESH_12MS_WINDOW 11
|
||||
#define PIPELINE_BUSY_THRESH_16MS_WINDOW 15
|
||||
|
||||
void walt_config(void)
|
||||
{
|
||||
int i, j, cpu;
|
||||
const char *name = socinfo_get_id_string();
|
||||
|
||||
sysctl_sched_group_upmigrate_pct = 100;
|
||||
sysctl_sched_group_downmigrate_pct = 95;
|
||||
sysctl_sched_task_unfilter_period = 100000000;
|
||||
sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
|
||||
sysctl_sched_ravg_window_nr_ticks = (HZ / NR_WINDOWS_PER_SEC);
|
||||
sched_load_granule = DEFAULT_SCHED_RAVG_WINDOW / NUM_LOAD_INDICES;
|
||||
sysctl_sched_coloc_busy_hyst_enable_cpus = 112;
|
||||
sysctl_sched_util_busy_hyst_enable_cpus = 255;
|
||||
sysctl_sched_coloc_busy_hyst_max_ms = 5000;
|
||||
sched_ravg_window = DEFAULT_SCHED_RAVG_WINDOW;
|
||||
sysctl_input_boost_ms = 40;
|
||||
sysctl_sched_min_task_util_for_boost = 51;
|
||||
sysctl_sched_min_task_util_for_uclamp = 51;
|
||||
sysctl_sched_min_task_util_for_colocation = 35;
|
||||
sysctl_sched_many_wakeup_threshold = WALT_MANY_WAKEUP_DEFAULT;
|
||||
sysctl_walt_rtg_cfs_boost_prio = 99; /* disabled by default */
|
||||
sysctl_sched_sync_hint_enable = 1;
|
||||
sysctl_sched_skip_sp_newly_idle_lb = 1;
|
||||
sysctl_sched_hyst_min_coloc_ns = 80000000;
|
||||
sysctl_sched_idle_enough = SCHED_IDLE_ENOUGH_DEFAULT;
|
||||
sysctl_sched_cluster_util_thres_pct = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
|
||||
sysctl_em_inflate_pct = 100;
|
||||
sysctl_em_inflate_thres = 1024;
|
||||
sysctl_max_freq_partial_halt = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
asym_cap_sibling_cpus = CPU_MASK_NONE;
|
||||
pipeline_sync_cpus = CPU_MASK_NONE;
|
||||
for_each_possible_cpu(cpu) {
|
||||
for (i = 0; i < LEGACY_SMART_FREQ; i++) {
|
||||
if (i)
|
||||
smart_freq_legacy_reason_hyst_ms[i][cpu] = 4;
|
||||
else
|
||||
smart_freq_legacy_reason_hyst_ms[i][cpu] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_MARGIN_LEVELS; i++) {
|
||||
sysctl_sched_capacity_margin_up_pct[i] = 95; /* ~5% margin */
|
||||
sysctl_sched_capacity_margin_dn_pct[i] = 85; /* ~15% margin */
|
||||
sysctl_sched_early_up[i] = 1077;
|
||||
sysctl_sched_early_down[i] = 1204;
|
||||
}
|
||||
|
||||
for (i = 0; i < WALT_NR_CPUS; i++) {
|
||||
sysctl_sched_coloc_busy_hyst_cpu[i] = 39000000;
|
||||
sysctl_sched_coloc_busy_hyst_cpu_busy_pct[i] = 10;
|
||||
sysctl_sched_util_busy_hyst_cpu[i] = 5000000;
|
||||
sysctl_sched_util_busy_hyst_cpu_util[i] = 15;
|
||||
sysctl_input_boost_freq[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_CLUSTERS; i++) {
|
||||
sysctl_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
high_perf_cluster_freq_cap[i] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
sysctl_sched_idle_enough_clust[i] = SCHED_IDLE_ENOUGH_DEFAULT;
|
||||
sysctl_sched_cluster_util_thres_pct_clust[i] = SCHED_CLUSTER_UTIL_THRES_PCT_DEFAULT;
|
||||
trailblazer_floor_freq[i] = 0;
|
||||
for (j = 0; j < MAX_CLUSTERS; j++) {
|
||||
load_sync_util_thres[i][j] = 0;
|
||||
load_sync_low_pct[i][j] = 0;
|
||||
load_sync_high_pct[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_FREQ_CAP; i++) {
|
||||
for (j = 0; j < MAX_CLUSTERS; j++)
|
||||
freq_cap[i][j] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
sysctl_sched_lrpb_active_ms[0] = PIPELINE_BUSY_THRESH_8MS_WINDOW;
|
||||
sysctl_sched_lrpb_active_ms[1] = PIPELINE_BUSY_THRESH_12MS_WINDOW;
|
||||
sysctl_sched_lrpb_active_ms[2] = PIPELINE_BUSY_THRESH_16MS_WINDOW;
|
||||
soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
|
||||
soc_feat_set(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
|
||||
soc_feat_set(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
|
||||
soc_feat_set(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
|
||||
soc_feat_set(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
|
||||
soc_feat_set(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
|
||||
soc_feat_set(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
|
||||
|
||||
sysctl_pipeline_special_task_util_thres = 100;
|
||||
sysctl_pipeline_non_special_task_util_thres = 200;
|
||||
sysctl_pipeline_pin_thres_low_pct = 50;
|
||||
sysctl_pipeline_pin_thres_high_pct = 60;
|
||||
|
||||
/* return if socinfo is not available */
|
||||
if (!name)
|
||||
return;
|
||||
|
||||
if (!strcmp(name, "SUN")) {
|
||||
sysctl_sched_suppress_region2 = 1;
|
||||
soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_TOPAPP_BIT);
|
||||
soc_feat_unset(SOC_ENABLE_CONSERVATIVE_BOOST_FG_BIT);
|
||||
soc_feat_unset(SOC_ENABLE_UCLAMP_BOOSTED_BIT);
|
||||
soc_feat_unset(SOC_ENABLE_PER_TASK_BOOST_ON_MID_BIT);
|
||||
trailblazer_floor_freq[0] = 1000000;
|
||||
debugfs_walt_features |= WALT_FEAT_TRAILBLAZER_BIT;
|
||||
debugfs_walt_features |= WALT_FEAT_UCLAMP_FREQ_BIT;
|
||||
soc_feat_unset(SOC_ENABLE_COLOCATION_PLACEMENT_BOOST_BIT);
|
||||
soc_feat_set(SOC_ENABLE_FT_BOOST_TO_ALL);
|
||||
oscillate_period_ns = 8000000;
|
||||
soc_feat_set(SOC_ENABLE_EXPERIMENT3);
|
||||
/*G + P*/
|
||||
cpumask_copy(&pipeline_sync_cpus, cpu_possible_mask);
|
||||
soc_sched_lib_name_capacity = 2;
|
||||
soc_feat_unset(SOC_ENABLE_PIPELINE_SWAPPING_BIT);
|
||||
|
||||
sysctl_cluster01_load_sync[0] = 350;
|
||||
sysctl_cluster01_load_sync[1] = 100;
|
||||
sysctl_cluster01_load_sync[2] = 100;
|
||||
sysctl_cluster10_load_sync[0] = 512;
|
||||
sysctl_cluster10_load_sync[1] = 90;
|
||||
sysctl_cluster10_load_sync[2] = 90;
|
||||
load_sync_util_thres[0][1] = sysctl_cluster01_load_sync[0];
|
||||
load_sync_low_pct[0][1] = sysctl_cluster01_load_sync[1];
|
||||
load_sync_high_pct[0][1] = sysctl_cluster01_load_sync[2];
|
||||
load_sync_util_thres[1][0] = sysctl_cluster10_load_sync[0];
|
||||
load_sync_low_pct[1][0] = sysctl_cluster10_load_sync[1];
|
||||
load_sync_high_pct[1][0] = sysctl_cluster10_load_sync[2];
|
||||
|
||||
sysctl_cluster01_load_sync_60fps[0] = 400;
|
||||
sysctl_cluster01_load_sync_60fps[1] = 60;
|
||||
sysctl_cluster01_load_sync_60fps[2] = 100;
|
||||
sysctl_cluster10_load_sync_60fps[0] = 500;
|
||||
sysctl_cluster10_load_sync_60fps[1] = 70;
|
||||
sysctl_cluster10_load_sync_60fps[2] = 90;
|
||||
load_sync_util_thres_60fps[0][1] = sysctl_cluster01_load_sync_60fps[0];
|
||||
load_sync_low_pct_60fps[0][1] = sysctl_cluster01_load_sync_60fps[1];
|
||||
load_sync_high_pct_60fps[0][1] = sysctl_cluster01_load_sync_60fps[2];
|
||||
load_sync_util_thres_60fps[1][0] = sysctl_cluster10_load_sync_60fps[0];
|
||||
load_sync_low_pct_60fps[1][0] = sysctl_cluster10_load_sync_60fps[1];
|
||||
load_sync_high_pct_60fps[1][0] = sysctl_cluster10_load_sync_60fps[2];
|
||||
|
||||
/* CPU0 needs an 9mS bias for all legacy smart freq reasons */
|
||||
for (i = 1; i < LEGACY_SMART_FREQ; i++)
|
||||
smart_freq_legacy_reason_hyst_ms[i][0] = 9;
|
||||
for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
|
||||
for (i = 1; i < LEGACY_SMART_FREQ; i++)
|
||||
smart_freq_legacy_reason_hyst_ms[i][cpu] = 2;
|
||||
}
|
||||
for_each_possible_cpu(cpu) {
|
||||
smart_freq_legacy_reason_hyst_ms[PIPELINE_60FPS_OR_LESSER_SMART_FREQ][cpu] =
|
||||
1;
|
||||
}
|
||||
soc_feat_unset(SOC_ENABLE_THERMAL_HALT_LOW_FREQ_BIT);
|
||||
} else if (!strcmp(name, "PINEAPPLE")) {
|
||||
soc_feat_set(SOC_ENABLE_SILVER_RT_SPREAD_BIT);
|
||||
soc_feat_set(SOC_ENABLE_BOOST_TO_NEXT_CLUSTER_BIT);
|
||||
|
||||
/* T + G */
|
||||
cpumask_or(&asym_cap_sibling_cpus,
|
||||
&asym_cap_sibling_cpus, &cpu_array[0][1]);
|
||||
cpumask_or(&asym_cap_sibling_cpus,
|
||||
&asym_cap_sibling_cpus, &cpu_array[0][2]);
|
||||
|
||||
/*
|
||||
* Treat Golds and Primes as candidates for load sync under pipeline usecase.
|
||||
* However, it is possible that a single CPU is not present. As prime is the
|
||||
* only cluster with only one CPU, guard this setting by ensuring 4 clusters
|
||||
* are present.
|
||||
*/
|
||||
if (num_sched_clusters == 4) {
|
||||
cpumask_or(&pipeline_sync_cpus,
|
||||
&pipeline_sync_cpus, &cpu_array[0][2]);
|
||||
cpumask_or(&pipeline_sync_cpus,
|
||||
&pipeline_sync_cpus, &cpu_array[0][3]);
|
||||
}
|
||||
|
||||
sysctl_cluster23_load_sync[0] = 350;
|
||||
sysctl_cluster23_load_sync[1] = 100;
|
||||
sysctl_cluster23_load_sync[2] = 100;
|
||||
sysctl_cluster32_load_sync[0] = 512;
|
||||
sysctl_cluster32_load_sync[1] = 90;
|
||||
sysctl_cluster32_load_sync[2] = 90;
|
||||
load_sync_util_thres[2][3] = sysctl_cluster23_load_sync[0];
|
||||
load_sync_low_pct[2][3] = sysctl_cluster23_load_sync[1];
|
||||
load_sync_high_pct[2][3] = sysctl_cluster23_load_sync[2];
|
||||
load_sync_util_thres[3][2] = sysctl_cluster32_load_sync[0];
|
||||
load_sync_low_pct[3][2] = sysctl_cluster32_load_sync[1];
|
||||
load_sync_high_pct[3][2] = sysctl_cluster32_load_sync[2];
|
||||
}
|
||||
|
||||
smart_freq_init(name);
|
||||
}
|
172
kernel/sched/walt/walt_cpufreq_cycle_cntr_driver.c
Normal file
172
kernel/sched/walt/walt_cpufreq_cycle_cntr_driver.c
Normal file
@@ -0,0 +1,172 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of_address.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/topology.h>
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
#define CYCLE_CNTR_OFFSET(core_id, acc_count) \
|
||||
(acc_count ? ((core_id + 1) * 4) : 0)
|
||||
|
||||
struct cpufreq_counter {
|
||||
u64 total_cycle_counter;
|
||||
u32 prev_cycle_counter;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
static struct cpufreq_counter walt_cpufreq_counter[NR_CPUS];
|
||||
|
||||
struct walt_cpufreq_soc_data {
|
||||
u32 reg_enable;
|
||||
u32 reg_cycle_cntr;
|
||||
bool accumulative_counter;
|
||||
};
|
||||
|
||||
struct walt_cpufreq_data {
|
||||
void __iomem *base;
|
||||
const struct walt_cpufreq_soc_data *soc_data;
|
||||
};
|
||||
|
||||
static struct walt_cpufreq_data cpufreq_data[MAX_CLUSTERS];
|
||||
|
||||
u64 walt_cpufreq_get_cpu_cycle_counter(int cpu, u64 wc)
|
||||
{
|
||||
const struct walt_cpufreq_soc_data *soc_data;
|
||||
struct cpufreq_counter *cpu_counter;
|
||||
struct walt_cpufreq_data *data;
|
||||
u64 cycle_counter_ret;
|
||||
unsigned long flags;
|
||||
u16 offset;
|
||||
u32 val;
|
||||
|
||||
data = &cpufreq_data[cpu_cluster(cpu)->id];
|
||||
soc_data = data->soc_data;
|
||||
|
||||
cpu_counter = &walt_cpufreq_counter[cpu];
|
||||
spin_lock_irqsave(&cpu_counter->lock, flags);
|
||||
|
||||
offset = CYCLE_CNTR_OFFSET(topology_core_id(cpu),
|
||||
soc_data->accumulative_counter);
|
||||
val = readl_relaxed(data->base +
|
||||
soc_data->reg_cycle_cntr + offset);
|
||||
|
||||
if (val < cpu_counter->prev_cycle_counter) {
|
||||
/* Handle counter overflow */
|
||||
cpu_counter->total_cycle_counter += UINT_MAX -
|
||||
cpu_counter->prev_cycle_counter + val;
|
||||
cpu_counter->prev_cycle_counter = val;
|
||||
} else {
|
||||
cpu_counter->total_cycle_counter += val -
|
||||
cpu_counter->prev_cycle_counter;
|
||||
cpu_counter->prev_cycle_counter = val;
|
||||
}
|
||||
cycle_counter_ret = cpu_counter->total_cycle_counter;
|
||||
spin_unlock_irqrestore(&cpu_counter->lock, flags);
|
||||
|
||||
pr_debug("CPU %u, core-id 0x%x, offset %u cycle_counts=%llu\n",
|
||||
cpu, topology_core_id(cpu), offset, cycle_counter_ret);
|
||||
|
||||
return cycle_counter_ret;
|
||||
}
|
||||
|
||||
static int walt_cpufreq_cycle_cntr_driver_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct resource *res;
|
||||
struct of_phandle_args args;
|
||||
struct device_node *cpu_np;
|
||||
void __iomem *base;
|
||||
int ret = -ENODEV, index, cpu;
|
||||
struct walt_sched_cluster *cluster;
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
cpu = cluster_first_cpu(cluster);
|
||||
cpu_np = of_cpu_device_node_get(cpu);
|
||||
if (!cpu_np)
|
||||
return -EINVAL;
|
||||
|
||||
ret = of_parse_phandle_with_args(cpu_np, "qcom,freq-domain",
|
||||
"#freq-domain-cells", 0, &args);
|
||||
of_node_put(cpu_np);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
index = args.args[0];
|
||||
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
|
||||
if (!res) {
|
||||
dev_err(dev, "failed to get mem resource %d\n", index);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
base = devm_ioremap(dev, res->start, resource_size(res));
|
||||
if (!base) {
|
||||
dev_err(dev, "failed to map resource %pR\n", res);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
cpufreq_data[cluster->id].soc_data = of_device_get_match_data(&pdev->dev);
|
||||
cpufreq_data[cluster->id].base = base;
|
||||
|
||||
/* HW should be in enabled state to proceed */
|
||||
if (!(readl_relaxed(base + cpufreq_data[cluster->id].soc_data->reg_enable) & 0x1)) {
|
||||
dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index);
|
||||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
|
||||
if (!walt_get_cycle_counts_cb) {
|
||||
for_each_possible_cpu(cpu)
|
||||
spin_lock_init(&walt_cpufreq_counter[cpu].lock);
|
||||
walt_get_cycle_counts_cb = walt_cpufreq_get_cpu_cycle_counter;
|
||||
use_cycle_counter = true;
|
||||
complete(&walt_get_cycle_counts_cb_completion);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int walt_cpufreq_cycle_cntr_driver_remove(struct platform_device *pdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct walt_cpufreq_soc_data hw_soc_data = {
|
||||
.reg_enable = 0x0,
|
||||
.reg_cycle_cntr = 0x9c0,
|
||||
.accumulative_counter = false,
|
||||
};
|
||||
|
||||
static const struct walt_cpufreq_soc_data epss_soc_data = {
|
||||
.reg_enable = 0x0,
|
||||
.reg_cycle_cntr = 0x3c4,
|
||||
.accumulative_counter = true,
|
||||
};
|
||||
|
||||
static const struct of_device_id walt_cpufreq_cycle_cntr_match[] = {
|
||||
{ .compatible = "qcom,cycle-cntr-hw", .data = &hw_soc_data },
|
||||
{ .compatible = "qcom,epss", .data = &epss_soc_data },
|
||||
{}
|
||||
};
|
||||
|
||||
static struct platform_driver walt_cpufreq_cycle_cntr_driver = {
|
||||
.driver = {
|
||||
.name = "walt-cpufreq-cycle-cntr",
|
||||
.of_match_table = walt_cpufreq_cycle_cntr_match
|
||||
},
|
||||
.probe = walt_cpufreq_cycle_cntr_driver_probe,
|
||||
.remove = walt_cpufreq_cycle_cntr_driver_remove,
|
||||
};
|
||||
|
||||
int walt_cpufreq_cycle_cntr_driver_register(void)
|
||||
{
|
||||
return platform_driver_register(&walt_cpufreq_cycle_cntr_driver);
|
||||
}
|
115
kernel/sched/walt/walt_cycles.c
Normal file
115
kernel/sched/walt/walt_cycles.c
Normal file
@@ -0,0 +1,115 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <trace/hooks/cpufreq.h>
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
struct walt_cpu_cycle {
|
||||
rwlock_t lock;
|
||||
u64 cycles;
|
||||
u64 last_time_ns;
|
||||
unsigned int cur_freq_khz;
|
||||
unsigned int mult_fact;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct walt_cpu_cycle, walt_cc);
|
||||
|
||||
static u64 walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 delta;
|
||||
u64 ret;
|
||||
|
||||
/*
|
||||
* freq is in KHz. so multiply by 1000.
|
||||
* time in nsec. so divide by NSEC_PER_SEC.
|
||||
*
|
||||
* cycles = (freq * 1000) * (t/10^9)
|
||||
* = (freq * t)/10^6
|
||||
*
|
||||
*/
|
||||
read_lock_irqsave(&wcc->lock, flags);
|
||||
delta = wc - wcc->last_time_ns;
|
||||
ret = wcc->cycles + ((delta * wcc->mult_fact) >> 20);
|
||||
read_unlock_irqrestore(&wcc->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void update_walt_compute_cpu_cycles(struct walt_cpu_cycle *wcc, u64 wc)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 delta;
|
||||
|
||||
/*
|
||||
* freq is in KHz. so multiply by 1000.
|
||||
* time in nsec. so divide by NSEC_PER_SEC.
|
||||
*
|
||||
* cycles = (freq * 1000) * (t/10^9)
|
||||
* = (freq * t)/10^6
|
||||
*
|
||||
*/
|
||||
write_lock_irqsave(&wcc->lock, flags);
|
||||
delta = wc - wcc->last_time_ns;
|
||||
wcc->cycles += (delta * wcc->mult_fact) >> 20;
|
||||
wcc->last_time_ns = wc;
|
||||
write_unlock_irqrestore(&wcc->lock, flags);
|
||||
}
|
||||
|
||||
u64 walt_cpu_cycle_counter(int cpu, u64 wc)
|
||||
{
|
||||
struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, cpu);
|
||||
u64 cycles;
|
||||
|
||||
cycles = walt_compute_cpu_cycles(wcc, wc);
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
||||
static void walt_cpufreq_transition(void *unused, struct cpufreq_policy *policy)
|
||||
{
|
||||
int i;
|
||||
struct walt_cpu_cycle *wcc;
|
||||
u64 wc;
|
||||
unsigned int mult_fact;
|
||||
|
||||
wc = sched_clock();
|
||||
for_each_cpu(i, policy->related_cpus) {
|
||||
wcc = &per_cpu(walt_cc, i);
|
||||
update_walt_compute_cpu_cycles(wcc, wc);
|
||||
wcc->cur_freq_khz = policy->cur;
|
||||
}
|
||||
|
||||
mult_fact = (policy->cur << SCHED_CAPACITY_SHIFT)/1000;
|
||||
mult_fact = (mult_fact << SCHED_CAPACITY_SHIFT)/1000;
|
||||
for_each_cpu(i, policy->related_cpus) {
|
||||
wcc = &per_cpu(walt_cc, i);
|
||||
wcc->mult_fact = mult_fact;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void walt_cycle_counter_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct walt_cpu_cycle *wcc = &per_cpu(walt_cc, i);
|
||||
|
||||
rwlock_init(&wcc->lock);
|
||||
wcc->cur_freq_khz = cpufreq_quick_get(i);
|
||||
wcc->last_time_ns = 0;
|
||||
wcc->cycles = 0;
|
||||
wcc->mult_fact = (wcc->cur_freq_khz << SCHED_CAPACITY_SHIFT)/1000;
|
||||
wcc->mult_fact = (wcc->mult_fact << SCHED_CAPACITY_SHIFT)/1000;
|
||||
}
|
||||
|
||||
walt_get_cycle_counts_cb = walt_cpu_cycle_counter;
|
||||
use_cycle_counter = true;
|
||||
complete(&walt_get_cycle_counts_cb_completion);
|
||||
|
||||
register_trace_android_rvh_cpufreq_transition(walt_cpufreq_transition, NULL);
|
||||
}
|
34
kernel/sched/walt/walt_debug.c
Normal file
34
kernel/sched/walt/walt_debug.c
Normal file
@@ -0,0 +1,34 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <trace/hooks/sched.h>
|
||||
|
||||
#include "walt.h"
|
||||
#include "walt_debug.h"
|
||||
|
||||
static void android_rvh_schedule_bug(void *unused, void *unused2)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
||||
static int __init walt_debug_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = preemptirq_long_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
register_trace_android_rvh_schedule_bug(android_rvh_schedule_bug, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
module_init(walt_debug_init);
|
||||
|
||||
MODULE_DESCRIPTION("QTI WALT Debug Module");
|
||||
MODULE_LICENSE("GPL v2");
|
5
kernel/sched/walt/walt_debug.h
Normal file
5
kernel/sched/walt/walt_debug.h
Normal file
@@ -0,0 +1,5 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
int preemptirq_long_init(void);
|
130
kernel/sched/walt/walt_gclk_cycle_counter_driver.c
Normal file
130
kernel/sched/walt/walt_gclk_cycle_counter_driver.c
Normal file
@@ -0,0 +1,130 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of_address.h>
|
||||
#include <linux/of_platform.h>
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
struct gclk_counter {
|
||||
u64 total_cycle_counter;
|
||||
u64 prev_cycle_counter;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
static struct gclk_counter walt_gclk_counter[MAX_CLUSTERS];
|
||||
|
||||
struct walt_ncc_data {
|
||||
void __iomem *base;
|
||||
};
|
||||
|
||||
static struct walt_ncc_data ncc_data[MAX_CLUSTERS];
|
||||
|
||||
u64 walt_get_ncc_gclk_cycle_counter(int cpu, u64 wc)
|
||||
{
|
||||
struct gclk_counter *ncc_counter;
|
||||
struct walt_ncc_data *data;
|
||||
u64 cycle_counter_ret;
|
||||
unsigned long flags;
|
||||
int index;
|
||||
u64 val;
|
||||
|
||||
index = topology_cluster_id(cpu);
|
||||
|
||||
data = &ncc_data[index];
|
||||
|
||||
ncc_counter = &walt_gclk_counter[index];
|
||||
spin_lock_irqsave(&ncc_counter->lock, flags);
|
||||
|
||||
val = readq_relaxed(data->base);
|
||||
|
||||
if (val < ncc_counter->prev_cycle_counter) {
|
||||
/* Handle counter overflow.
|
||||
* Most likely will not occur
|
||||
* for 64 bit counter, but
|
||||
* handling for completeness.
|
||||
*/
|
||||
ncc_counter->total_cycle_counter += U64_MAX -
|
||||
ncc_counter->prev_cycle_counter + val;
|
||||
ncc_counter->prev_cycle_counter = val;
|
||||
} else {
|
||||
ncc_counter->total_cycle_counter += val -
|
||||
ncc_counter->prev_cycle_counter;
|
||||
ncc_counter->prev_cycle_counter = val;
|
||||
}
|
||||
cycle_counter_ret = ncc_counter->total_cycle_counter;
|
||||
spin_unlock_irqrestore(&ncc_counter->lock, flags);
|
||||
|
||||
pr_debug("CPU %u, cluster-id %d\n", cpu, index);
|
||||
|
||||
return cycle_counter_ret;
|
||||
}
|
||||
|
||||
static int walt_gclk_cycle_counter_driver_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct resource *res;
|
||||
void __iomem *base;
|
||||
int ret = -ENODEV, index;
|
||||
struct walt_sched_cluster *cluster;
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
index = topology_cluster_id(cpumask_first(&cluster->cpus));
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
|
||||
if (!res) {
|
||||
dev_err(dev, "failed to get mem resource %d\n", index);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!devm_request_mem_region(dev, res->start, resource_size(res), res->name)) {
|
||||
dev_err(dev, "failed to request resource %pR\n", res);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
base = devm_ioremap(dev, res->start, resource_size(res));
|
||||
if (!base) {
|
||||
dev_err(dev, "failed to map resource %pR\n", res);
|
||||
return -ENOMEM;
|
||||
}
|
||||
ncc_data[index].base = base;
|
||||
}
|
||||
|
||||
if (!walt_get_cycle_counts_cb) {
|
||||
for (int i = 0; i < MAX_CLUSTERS; i++)
|
||||
spin_lock_init(&walt_gclk_counter[i].lock);
|
||||
walt_get_cycle_counts_cb = walt_get_ncc_gclk_cycle_counter;
|
||||
use_cycle_counter = true;
|
||||
complete(&walt_get_cycle_counts_cb_completion);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int walt_gclk_cycle_counter_driver_remove(struct platform_device *pdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct of_device_id walt_gclk_cycle_counter_match[] = {
|
||||
{ .compatible = "qcom,gclk" },
|
||||
{}
|
||||
};
|
||||
|
||||
static struct platform_driver walt_gclk_cycle_counter_driver = {
|
||||
.driver = {
|
||||
.name = "walt-gclk-cycle-counter",
|
||||
.of_match_table = walt_gclk_cycle_counter_match
|
||||
},
|
||||
.probe = walt_gclk_cycle_counter_driver_probe,
|
||||
.remove = walt_gclk_cycle_counter_driver_remove,
|
||||
};
|
||||
|
||||
int walt_gclk_cycle_counter_driver_register(void)
|
||||
{
|
||||
return platform_driver_register(&walt_gclk_cycle_counter_driver);
|
||||
}
|
713
kernel/sched/walt/walt_halt.c
Normal file
713
kernel/sched/walt/walt_halt.c
Normal file
@@ -0,0 +1,713 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <trace/hooks/sched.h>
|
||||
#include <walt.h>
|
||||
#include "trace.h"
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
enum pause_type {
|
||||
HALT,
|
||||
PARTIAL_HALT,
|
||||
|
||||
MAX_PAUSE_TYPE
|
||||
};
|
||||
|
||||
/* if a cpu is halting */
|
||||
struct cpumask __cpu_halt_mask;
|
||||
struct cpumask __cpu_partial_halt_mask;
|
||||
|
||||
/* spin lock to allow calling from non-preemptible context */
|
||||
static DEFINE_RAW_SPINLOCK(halt_lock);
|
||||
|
||||
struct halt_cpu_state {
|
||||
u8 client_vote_mask[MAX_PAUSE_TYPE];
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct halt_cpu_state, halt_state);
|
||||
static DEFINE_RAW_SPINLOCK(walt_drain_pending_lock);
|
||||
|
||||
/* the amount of time allowed for enqueue operations that happen
|
||||
* just after a halt operation.
|
||||
*/
|
||||
#define WALT_HALT_CHECK_THRESHOLD_NS 400000
|
||||
|
||||
/*
|
||||
* Remove a task from the runqueue and pretend that it's migrating. This
|
||||
* should prevent migrations for the detached task and disallow further
|
||||
* changes to tsk_cpus_allowed.
|
||||
*/
|
||||
void
|
||||
detach_one_task_core(struct task_struct *p, struct rq *rq,
|
||||
struct list_head *tasks)
|
||||
{
|
||||
walt_lockdep_assert_rq(rq, p);
|
||||
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
deactivate_task(rq, p, 0);
|
||||
list_add(&p->se.group_node, tasks);
|
||||
}
|
||||
|
||||
void attach_tasks_core(struct list_head *tasks, struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
walt_lockdep_assert_rq(rq, NULL);
|
||||
|
||||
while (!list_empty(tasks)) {
|
||||
p = list_first_entry(tasks, struct task_struct, se.group_node);
|
||||
list_del_init(&p->se.group_node);
|
||||
|
||||
BUG_ON(task_rq(p) != rq);
|
||||
activate_task(rq, p, 0);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Migrate all tasks from the rq, sleeping tasks will be migrated by
|
||||
* try_to_wake_up()->select_task_rq().
|
||||
*
|
||||
* Called with rq->__lock held even though we'er in stop_machine() and
|
||||
* there's no concurrency possible, we hold the required locks anyway
|
||||
* because of lock validation efforts.
|
||||
*
|
||||
* The function will skip CPU pinned kthreads.
|
||||
*/
|
||||
static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
|
||||
{
|
||||
struct rq *rq = dead_rq;
|
||||
struct task_struct *next, *stop = rq->stop;
|
||||
LIST_HEAD(percpu_kthreads);
|
||||
unsigned int num_pinned_kthreads = 1;
|
||||
struct rq_flags orf = *rf;
|
||||
int dest_cpu;
|
||||
|
||||
/*
|
||||
* Fudge the rq selection such that the below task selection loop
|
||||
* doesn't get stuck on the currently eligible stop task.
|
||||
*
|
||||
* We're currently inside stop_machine() and the rq is either stuck
|
||||
* in the stop_machine_cpu_stop() loop, or we're executing this code,
|
||||
* either way we should never end up calling schedule() until we're
|
||||
* done here.
|
||||
*/
|
||||
rq->stop = NULL;
|
||||
|
||||
/*
|
||||
* put_prev_task() and pick_next_task() sched
|
||||
* class method both need to have an up-to-date
|
||||
* value of rq->clock[_task]
|
||||
*/
|
||||
update_rq_clock(rq);
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
/* note the clock update in orf */
|
||||
orf.clock_update_flags |= RQCF_UPDATED;
|
||||
#endif
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* There's this thread running, bail when that's the only
|
||||
* remaining thread:
|
||||
*/
|
||||
if (rq->nr_running == 1)
|
||||
break;
|
||||
|
||||
next = pick_migrate_task(rq);
|
||||
|
||||
/*
|
||||
* Argh ... no iterator for tasks, we need to remove the
|
||||
* kthread from the run-queue to continue.
|
||||
*/
|
||||
|
||||
if (is_per_cpu_kthread(next)) {
|
||||
detach_one_task_core(next, rq, &percpu_kthreads);
|
||||
num_pinned_kthreads += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Rules for changing task_struct::cpus_mask are holding
|
||||
* both pi_lock and rq->__lock, such that holding either
|
||||
* stabilizes the mask.
|
||||
*
|
||||
* Drop rq->__lock is not quite as disastrous as it usually is
|
||||
* because !cpu_active at this point, which means load-balance
|
||||
* will not interfere. Also, stop-machine.
|
||||
*/
|
||||
rq_unlock(rq, rf);
|
||||
raw_spin_lock(&next->pi_lock);
|
||||
raw_spin_rq_lock(rq);
|
||||
rq_repin_lock(rq, rf);
|
||||
|
||||
/*
|
||||
* Since we're inside stop-machine, _nothing_ should have
|
||||
* changed the task, WARN if weird stuff happened, because in
|
||||
* that case the above rq->__lock drop is a fail too.
|
||||
*/
|
||||
if (task_rq(next) != rq || !task_on_rq_queued(next)) {
|
||||
raw_spin_unlock(&next->pi_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Find suitable destination for @next */
|
||||
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
|
||||
|
||||
if (cpu_of(rq) != dest_cpu && !is_migration_disabled(next)) {
|
||||
/* only perform a required migration */
|
||||
rq = __migrate_task(rq, rf, next, dest_cpu);
|
||||
|
||||
if (rq != dead_rq) {
|
||||
rq_unlock(rq, rf);
|
||||
rq = dead_rq;
|
||||
*rf = orf;
|
||||
raw_spin_rq_lock(rq);
|
||||
rq_repin_lock(rq, rf);
|
||||
}
|
||||
} else {
|
||||
detach_one_task_core(next, rq, &percpu_kthreads);
|
||||
num_pinned_kthreads += 1;
|
||||
}
|
||||
|
||||
raw_spin_unlock(&next->pi_lock);
|
||||
}
|
||||
|
||||
if (num_pinned_kthreads > 1)
|
||||
attach_tasks_core(&percpu_kthreads, rq);
|
||||
|
||||
rq->stop = stop;
|
||||
}
|
||||
|
||||
void __balance_callbacks(struct rq *rq);
|
||||
|
||||
static int drain_rq_cpu_stop(void *data)
|
||||
{
|
||||
struct rq *rq = this_rq();
|
||||
struct rq_flags rf;
|
||||
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
||||
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
/* rq lock is pinned */
|
||||
|
||||
/* migrate tasks assumes that the lock is pinned, and will unlock/repin */
|
||||
migrate_tasks(rq, &rf);
|
||||
|
||||
/* __balance_callbacks can unlock and relock the rq lock. unpin */
|
||||
rq_unpin_lock(rq, &rf);
|
||||
|
||||
/*
|
||||
* service any callbacks that were accumulated, prior to unlocking. such that
|
||||
* any subsequent calls to rq_lock... will see an rq->balance_callback set to
|
||||
* the default (0 or balance_push_callback);
|
||||
*/
|
||||
wrq->enqueue_counter = 0;
|
||||
__balance_callbacks(rq);
|
||||
if (wrq->enqueue_counter)
|
||||
WALT_BUG(WALT_BUG_WALT, NULL, "cpu: %d task was re-enqueued", cpu_of(rq));
|
||||
|
||||
/* lock is no longer pinned, raw unlock using same flags as locking */
|
||||
raw_spin_rq_unlock_irqrestore(rq, rf.flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpu_drain_rq(unsigned int cpu)
|
||||
{
|
||||
if (!cpu_online(cpu))
|
||||
return 0;
|
||||
|
||||
if (available_idle_cpu(cpu))
|
||||
return 0;
|
||||
|
||||
/* this will schedule, must not be in atomic context */
|
||||
return stop_one_cpu(cpu, drain_rq_cpu_stop, NULL);
|
||||
}
|
||||
|
||||
struct drain_thread_data {
|
||||
cpumask_t cpus_to_drain;
|
||||
};
|
||||
|
||||
static struct drain_thread_data drain_data = {
|
||||
.cpus_to_drain = { CPU_BITS_NONE }
|
||||
};
|
||||
|
||||
static int __ref try_drain_rqs(void *data)
|
||||
{
|
||||
cpumask_t *cpus_ptr = &((struct drain_thread_data *)data)->cpus_to_drain;
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
|
||||
if (cpumask_weight(cpus_ptr)) {
|
||||
cpumask_t local_cpus;
|
||||
|
||||
cpumask_copy(&local_cpus, cpus_ptr);
|
||||
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
|
||||
|
||||
for_each_cpu(cpu, &local_cpus)
|
||||
cpu_drain_rq(cpu);
|
||||
|
||||
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
|
||||
cpumask_andnot(cpus_ptr, cpus_ptr, &local_cpus);
|
||||
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
set_current_state(TASK_RUNNING);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void restrict_cpus_and_freq(struct cpumask *cpus)
|
||||
{
|
||||
struct cpumask restrict_cpus;
|
||||
int cpu = 0;
|
||||
|
||||
cpumask_copy(&restrict_cpus, cpus);
|
||||
|
||||
if (cpumask_intersects(cpus, cpu_partial_halt_mask) &&
|
||||
!cpumask_intersects(cpus, cpu_halt_mask) &&
|
||||
is_state1()) {
|
||||
for_each_cpu(cpu, cpus)
|
||||
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
|
||||
sysctl_max_freq_partial_halt;
|
||||
} else {
|
||||
for_each_cpu(cpu, cpus) {
|
||||
cpumask_or(&restrict_cpus, &restrict_cpus, &(cpu_cluster(cpu)->cpus));
|
||||
freq_cap[PARTIAL_HALT_CAP][cpu_cluster(cpu)->id] =
|
||||
FREQ_QOS_MAX_DEFAULT_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
update_smart_freq_capacities();
|
||||
}
|
||||
|
||||
struct task_struct *walt_drain_thread;
|
||||
|
||||
static int halt_cpus(struct cpumask *cpus, enum pause_type type)
|
||||
{
|
||||
int cpu;
|
||||
int ret = 0;
|
||||
u64 start_time = 0;
|
||||
struct halt_cpu_state *halt_cpu_state;
|
||||
unsigned long flags;
|
||||
|
||||
if (trace_halt_cpus_enabled())
|
||||
start_time = sched_clock();
|
||||
|
||||
trace_halt_cpus_start(cpus, 1);
|
||||
|
||||
/* add the cpus to the halt mask */
|
||||
for_each_cpu(cpu, cpus) {
|
||||
if (cpu == cpumask_first(system_32bit_el0_cpumask())) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
|
||||
|
||||
if (type == HALT)
|
||||
cpumask_set_cpu(cpu, cpu_halt_mask);
|
||||
else
|
||||
cpumask_set_cpu(cpu, cpu_partial_halt_mask);
|
||||
|
||||
/* guarantee mask written at this time */
|
||||
wmb();
|
||||
}
|
||||
|
||||
restrict_cpus_and_freq(cpus);
|
||||
|
||||
/* migrate tasks off the cpu */
|
||||
if (type == HALT) {
|
||||
/* signal and wakeup the drain kthread */
|
||||
raw_spin_lock_irqsave(&walt_drain_pending_lock, flags);
|
||||
cpumask_or(&drain_data.cpus_to_drain, &drain_data.cpus_to_drain, cpus);
|
||||
raw_spin_unlock_irqrestore(&walt_drain_pending_lock, flags);
|
||||
|
||||
wake_up_process(walt_drain_thread);
|
||||
}
|
||||
out:
|
||||
trace_halt_cpus(cpus, start_time, 1, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* start the cpus again, and kick them to balance */
|
||||
static int start_cpus(struct cpumask *cpus, enum pause_type type)
|
||||
{
|
||||
u64 start_time = sched_clock();
|
||||
struct halt_cpu_state *halt_cpu_state;
|
||||
int cpu;
|
||||
|
||||
trace_halt_cpus_start(cpus, 0);
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
|
||||
|
||||
/* guarantee the halt state is updated */
|
||||
wmb();
|
||||
|
||||
if (type == HALT)
|
||||
cpumask_clear_cpu(cpu, cpu_halt_mask);
|
||||
else
|
||||
cpumask_clear_cpu(cpu, cpu_partial_halt_mask);
|
||||
|
||||
/* kick the cpu so it can pull tasks
|
||||
* after the mask has been cleared.
|
||||
*/
|
||||
walt_smp_call_newidle_balance(cpu);
|
||||
}
|
||||
|
||||
restrict_cpus_and_freq(cpus);
|
||||
|
||||
trace_halt_cpus(cpus, start_time, 0, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* update client for cpus in yield/halt mask */
|
||||
static void update_clients(struct cpumask *cpus, bool halt, enum pause_client client,
|
||||
enum pause_type type)
|
||||
{
|
||||
int cpu;
|
||||
struct halt_cpu_state *halt_cpu_state;
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
|
||||
if (halt)
|
||||
halt_cpu_state->client_vote_mask[type] |= client;
|
||||
else
|
||||
halt_cpu_state->client_vote_mask[type] &= ~client;
|
||||
}
|
||||
}
|
||||
|
||||
/* remove cpus that are already halted */
|
||||
static void update_halt_cpus(struct cpumask *cpus, enum pause_type type)
|
||||
{
|
||||
int cpu;
|
||||
struct halt_cpu_state *halt_cpu_state;
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
|
||||
if (halt_cpu_state->client_vote_mask[type])
|
||||
cpumask_clear_cpu(cpu, cpus);
|
||||
}
|
||||
}
|
||||
|
||||
/* cpus will be modified */
|
||||
static int walt_halt_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
|
||||
{
|
||||
int ret = 0;
|
||||
cpumask_t requested_cpus;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&halt_lock, flags);
|
||||
|
||||
cpumask_copy(&requested_cpus, cpus);
|
||||
|
||||
/* remove cpus that are already halted */
|
||||
update_halt_cpus(cpus, type);
|
||||
|
||||
if (cpumask_empty(cpus)) {
|
||||
update_clients(&requested_cpus, true, client, type);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = halt_cpus(cpus, type);
|
||||
|
||||
if (ret < 0)
|
||||
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
|
||||
cpumask_pr_args(&requested_cpus));
|
||||
else
|
||||
update_clients(&requested_cpus, true, client, type);
|
||||
unlock:
|
||||
raw_spin_unlock_irqrestore(&halt_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int walt_pause_cpus(struct cpumask *cpus, enum pause_client client)
|
||||
{
|
||||
if (walt_disabled)
|
||||
return -EAGAIN;
|
||||
return walt_halt_cpus(cpus, client, HALT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(walt_pause_cpus);
|
||||
|
||||
int walt_partial_pause_cpus(struct cpumask *cpus, enum pause_client client)
|
||||
{
|
||||
if (walt_disabled)
|
||||
return -EAGAIN;
|
||||
return walt_halt_cpus(cpus, client, PARTIAL_HALT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(walt_partial_pause_cpus);
|
||||
|
||||
/* cpus will be modified */
|
||||
static int walt_start_cpus(struct cpumask *cpus, enum pause_client client, enum pause_type type)
|
||||
{
|
||||
int ret = 0;
|
||||
cpumask_t requested_cpus;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&halt_lock, flags);
|
||||
cpumask_copy(&requested_cpus, cpus);
|
||||
update_clients(&requested_cpus, false, client, type);
|
||||
|
||||
/* remove cpus that should still be halted */
|
||||
update_halt_cpus(cpus, type);
|
||||
|
||||
ret = start_cpus(cpus, type);
|
||||
|
||||
if (ret < 0) {
|
||||
pr_debug("halt_cpus failure ret=%d cpus=%*pbl\n", ret,
|
||||
cpumask_pr_args(&requested_cpus));
|
||||
/* restore/increment ref counts in case of error */
|
||||
update_clients(&requested_cpus, true, client, type);
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&halt_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int walt_resume_cpus(struct cpumask *cpus, enum pause_client client)
|
||||
{
|
||||
if (walt_disabled)
|
||||
return -EAGAIN;
|
||||
return walt_start_cpus(cpus, client, HALT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(walt_resume_cpus);
|
||||
|
||||
int walt_partial_resume_cpus(struct cpumask *cpus, enum pause_client client)
|
||||
{
|
||||
if (walt_disabled)
|
||||
return -EAGAIN;
|
||||
return walt_start_cpus(cpus, client, PARTIAL_HALT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(walt_partial_resume_cpus);
|
||||
|
||||
/* return true if the requested client has fully halted one of the cpus */
|
||||
bool cpus_halted_by_client(struct cpumask *cpus, enum pause_client client)
|
||||
{
|
||||
struct halt_cpu_state *halt_cpu_state;
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
halt_cpu_state = per_cpu_ptr(&halt_state, cpu);
|
||||
if ((bool)(halt_cpu_state->client_vote_mask[HALT] & client))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void android_rvh_get_nohz_timer_target(void *unused, int *cpu, bool *done)
|
||||
{
|
||||
int i, default_cpu = -1;
|
||||
struct sched_domain *sd;
|
||||
cpumask_t active_unhalted;
|
||||
|
||||
*done = true;
|
||||
cpumask_andnot(&active_unhalted, cpu_active_mask, cpu_halt_mask);
|
||||
|
||||
if (housekeeping_cpu(*cpu, HK_TYPE_TIMER) && !cpu_halted(*cpu)) {
|
||||
if (!available_idle_cpu(*cpu))
|
||||
return;
|
||||
default_cpu = *cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* find first cpu halted by core control and try to avoid
|
||||
* affecting externally halted cpus.
|
||||
*/
|
||||
if (!cpumask_weight(&active_unhalted)) {
|
||||
cpumask_t tmp_pause, tmp_part_pause, tmp_halt, *tmp;
|
||||
|
||||
cpumask_and(&tmp_part_pause, cpu_active_mask, &cpus_part_paused_by_us);
|
||||
cpumask_and(&tmp_pause, cpu_active_mask, &cpus_paused_by_us);
|
||||
cpumask_and(&tmp_halt, cpu_active_mask, cpu_halt_mask);
|
||||
tmp = cpumask_weight(&tmp_part_pause) ? &tmp_part_pause :
|
||||
cpumask_weight(&tmp_pause) ? &tmp_pause : &tmp_halt;
|
||||
|
||||
for_each_cpu(i, tmp) {
|
||||
if ((*cpu == i) && cpumask_weight(tmp) > 1)
|
||||
continue;
|
||||
|
||||
*cpu = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(*cpu, sd) {
|
||||
for_each_cpu_and(i, sched_domain_span(sd),
|
||||
housekeeping_cpumask(HK_TYPE_TIMER)) {
|
||||
if (*cpu == i)
|
||||
continue;
|
||||
|
||||
if (!available_idle_cpu(i) && !cpu_halted(i)) {
|
||||
*cpu = i;
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (default_cpu == -1) {
|
||||
for_each_cpu_and(i, &active_unhalted,
|
||||
housekeeping_cpumask(HK_TYPE_TIMER)) {
|
||||
if (*cpu == i)
|
||||
continue;
|
||||
|
||||
if (!available_idle_cpu(i)) {
|
||||
*cpu = i;
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
|
||||
/* choose any active unhalted cpu */
|
||||
default_cpu = cpumask_any(&active_unhalted);
|
||||
if (unlikely(default_cpu >= nr_cpu_ids))
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
*cpu = default_cpu;
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* android_rvh_set_cpus_allowed_by_task: disallow cpus that are halted
|
||||
*
|
||||
* NOTES: may be called if migration is disabled for the task
|
||||
* if per-cpu-kthread, must not deliberately return an invalid cpu
|
||||
* if !per-cpu-kthread, may return an invalid cpu (reject dest_cpu)
|
||||
* must not change cpu in in_exec 32bit task case
|
||||
*/
|
||||
static void android_rvh_set_cpus_allowed_by_task(void *unused,
|
||||
const struct cpumask *cpu_valid_mask,
|
||||
const struct cpumask *new_mask,
|
||||
struct task_struct *p,
|
||||
unsigned int *dest_cpu)
|
||||
{
|
||||
if (unlikely(walt_disabled))
|
||||
return;
|
||||
|
||||
/* allow kthreads to change affinity regardless of halt status of dest_cpu */
|
||||
if (p->flags & PF_KTHREAD)
|
||||
return;
|
||||
|
||||
if (cpu_halted(*dest_cpu) && !p->migration_disabled) {
|
||||
cpumask_t allowed_cpus;
|
||||
|
||||
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve))
|
||||
return;
|
||||
|
||||
/* remove halted cpus from the valid mask, and store locally */
|
||||
cpumask_andnot(&allowed_cpus, cpu_valid_mask, cpu_halt_mask);
|
||||
cpumask_and(&allowed_cpus, &allowed_cpus, new_mask);
|
||||
|
||||
/* do not modify dest_cpu if there are no cpus to choose from */
|
||||
if (!cpumask_empty(&allowed_cpus))
|
||||
*dest_cpu = cpumask_any_and_distribute(&allowed_cpus, new_mask);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* android_rvh_rto_next-cpu: disallow halted cpus for irq workfunctions
|
||||
*/
|
||||
static void android_rvh_rto_next_cpu(void *unused, int rto_cpu, struct cpumask *rto_mask, int *cpu)
|
||||
{
|
||||
cpumask_t allowed_cpus;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return;
|
||||
|
||||
if (cpu_halted(*cpu)) {
|
||||
/* remove halted cpus from the valid mask, and store locally */
|
||||
cpumask_andnot(&allowed_cpus, rto_mask, cpu_halt_mask);
|
||||
*cpu = cpumask_next(rto_cpu, &allowed_cpus);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* android_rvh_is_cpu_allowed: disallow cpus that are halted
|
||||
*
|
||||
* NOTE: this function will not be called if migration is disabled for the task.
|
||||
*/
|
||||
static void android_rvh_is_cpu_allowed(void *unused, struct task_struct *p, int cpu, bool *allowed)
|
||||
{
|
||||
if (unlikely(walt_disabled))
|
||||
return;
|
||||
|
||||
if (cpumask_test_cpu(cpu, cpu_halt_mask)) {
|
||||
cpumask_t cpus_allowed;
|
||||
|
||||
/* default reject for any halted cpu */
|
||||
*allowed = false;
|
||||
|
||||
if (unlikely(is_compat_thread(task_thread_info(p)) && p->in_execve)) {
|
||||
/* 32bit task in execve. allow this cpu. */
|
||||
*allowed = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* for cfs threads, active cpus in the affinity are allowed
|
||||
* but halted cpus are not allowed
|
||||
*/
|
||||
cpumask_and(&cpus_allowed, cpu_active_mask, p->cpus_ptr);
|
||||
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_halt_mask);
|
||||
|
||||
if (!(p->flags & PF_KTHREAD)) {
|
||||
if (cpumask_empty(&cpus_allowed)) {
|
||||
/*
|
||||
* All affined cpus are inactive or halted.
|
||||
* Allow this cpu for user threads
|
||||
*/
|
||||
*allowed = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* for kthreads, dying cpus are not allowed */
|
||||
cpumask_andnot(&cpus_allowed, &cpus_allowed, cpu_dying_mask);
|
||||
if (cpumask_empty(&cpus_allowed)) {
|
||||
/*
|
||||
* All affined cpus inactive or halted or dying.
|
||||
* Allow this cpu for kthreads
|
||||
*/
|
||||
*allowed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void walt_halt_init(void)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
|
||||
|
||||
walt_drain_thread = kthread_run(try_drain_rqs, &drain_data, "halt_drain_rqs");
|
||||
if (IS_ERR(walt_drain_thread)) {
|
||||
pr_err("Error creating walt drain thread\n");
|
||||
return;
|
||||
}
|
||||
|
||||
sched_setscheduler_nocheck(walt_drain_thread, SCHED_FIFO, ¶m);
|
||||
|
||||
register_trace_android_rvh_get_nohz_timer_target(android_rvh_get_nohz_timer_target, NULL);
|
||||
register_trace_android_rvh_set_cpus_allowed_by_task(
|
||||
android_rvh_set_cpus_allowed_by_task, NULL);
|
||||
register_trace_android_rvh_rto_next_cpu(android_rvh_rto_next_cpu, NULL);
|
||||
register_trace_android_rvh_is_cpu_allowed(android_rvh_is_cpu_allowed, NULL);
|
||||
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
1193
kernel/sched/walt/walt_lb.c
Normal file
1193
kernel/sched/walt/walt_lb.c
Normal file
File diff suppressed because it is too large
Load Diff
431
kernel/sched/walt/walt_rt.c
Normal file
431
kernel/sched/walt/walt_rt.c
Normal file
@@ -0,0 +1,431 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
|
||||
* Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <trace/hooks/sched.h>
|
||||
|
||||
#include "walt.h"
|
||||
#include "trace.h"
|
||||
|
||||
static DEFINE_PER_CPU(cpumask_var_t, walt_local_cpu_mask);
|
||||
DEFINE_PER_CPU(u64, rt_task_arrival_time) = 0;
|
||||
static bool long_running_rt_task_trace_rgstrd;
|
||||
|
||||
static void rt_task_arrival_marker(void *unused, bool preempt,
|
||||
struct task_struct *prev, struct task_struct *next,
|
||||
unsigned int prev_state)
|
||||
{
|
||||
unsigned int cpu = raw_smp_processor_id();
|
||||
|
||||
if (next->policy == SCHED_FIFO && next != cpu_rq(cpu)->stop)
|
||||
per_cpu(rt_task_arrival_time, cpu) = rq_clock_task(this_rq());
|
||||
else
|
||||
per_cpu(rt_task_arrival_time, cpu) = 0;
|
||||
}
|
||||
|
||||
static void long_running_rt_task_notifier(void *unused, struct rq *rq)
|
||||
{
|
||||
struct task_struct *curr = rq->curr;
|
||||
unsigned int cpu = raw_smp_processor_id();
|
||||
|
||||
if (!sysctl_sched_long_running_rt_task_ms)
|
||||
return;
|
||||
|
||||
if (!per_cpu(rt_task_arrival_time, cpu))
|
||||
return;
|
||||
|
||||
if (per_cpu(rt_task_arrival_time, cpu) && curr->policy != SCHED_FIFO) {
|
||||
/*
|
||||
* It is possible that the scheduling policy for the current
|
||||
* task might get changed after task arrival time stamp is
|
||||
* noted during sched_switch of RT task. To avoid such false
|
||||
* positives, reset arrival time stamp.
|
||||
*/
|
||||
per_cpu(rt_task_arrival_time, cpu) = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since we are called from the main tick, rq clock task must have
|
||||
* been updated very recently. Use it directly, instead of
|
||||
* update_rq_clock_task() to avoid warnings.
|
||||
*/
|
||||
if (rq->clock_task -
|
||||
per_cpu(rt_task_arrival_time, cpu)
|
||||
> sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC) {
|
||||
printk_deferred("RT task %s (%d) runtime > %u now=%llu task arrival time=%llu runtime=%llu\n",
|
||||
curr->comm, curr->pid,
|
||||
sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC,
|
||||
rq->clock_task,
|
||||
per_cpu(rt_task_arrival_time, cpu),
|
||||
rq->clock_task -
|
||||
per_cpu(rt_task_arrival_time, cpu));
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
int sched_long_running_rt_task_ms_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
static DEFINE_MUTEX(mutex);
|
||||
|
||||
mutex_lock(&mutex);
|
||||
|
||||
ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (sysctl_sched_long_running_rt_task_ms > 0 &&
|
||||
sysctl_sched_long_running_rt_task_ms < 800)
|
||||
sysctl_sched_long_running_rt_task_ms = 800;
|
||||
|
||||
if (write && !long_running_rt_task_trace_rgstrd) {
|
||||
register_trace_sched_switch(rt_task_arrival_marker, NULL);
|
||||
register_trace_android_vh_scheduler_tick(long_running_rt_task_notifier, NULL);
|
||||
long_running_rt_task_trace_rgstrd = true;
|
||||
}
|
||||
|
||||
mutex_unlock(&mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void walt_rt_energy_aware_wake_cpu(struct task_struct *task, struct cpumask *lowest_mask,
|
||||
int ret, int *best_cpu)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long util, best_cpu_util = ULONG_MAX;
|
||||
unsigned long best_cpu_util_cum = ULONG_MAX;
|
||||
unsigned long util_cum;
|
||||
unsigned long tutil = task_util(task);
|
||||
unsigned int best_idle_exit_latency = UINT_MAX;
|
||||
unsigned int cpu_idle_exit_latency = UINT_MAX;
|
||||
bool boost_on_big = rt_boost_on_big();
|
||||
int cluster;
|
||||
int order_index = (boost_on_big && num_sched_clusters > 1) ? 1 : 0;
|
||||
int end_index = 0;
|
||||
bool best_cpu_lt = true;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return;
|
||||
|
||||
if (!ret)
|
||||
return; /* No targets found */
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (soc_feat(SOC_ENABLE_SILVER_RT_SPREAD_BIT) && order_index == 0)
|
||||
end_index = 1;
|
||||
|
||||
for (cluster = 0; cluster < num_sched_clusters; cluster++) {
|
||||
for_each_cpu_and(cpu, lowest_mask, &cpu_array[order_index][cluster]) {
|
||||
bool lt;
|
||||
|
||||
trace_sched_cpu_util(cpu, lowest_mask);
|
||||
|
||||
if (!cpu_active(cpu))
|
||||
continue;
|
||||
|
||||
if (cpu_halted(cpu))
|
||||
continue;
|
||||
|
||||
if (sched_cpu_high_irqload(cpu))
|
||||
continue;
|
||||
|
||||
if (__cpu_overutilized(cpu, tutil))
|
||||
continue;
|
||||
|
||||
util = cpu_util(cpu);
|
||||
|
||||
lt = (walt_low_latency_task(cpu_rq(cpu)->curr) ||
|
||||
walt_nr_rtg_high_prio(cpu));
|
||||
|
||||
/*
|
||||
* When the best is suitable and the current is not,
|
||||
* skip it
|
||||
*/
|
||||
if (lt && !best_cpu_lt)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Either both are sutilable or unsuitable, load takes
|
||||
* precedence.
|
||||
*/
|
||||
if (!(best_cpu_lt ^ lt) && (util > best_cpu_util))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the previous CPU has same load, keep it as
|
||||
* best_cpu.
|
||||
*/
|
||||
if (best_cpu_util == util && *best_cpu == task_cpu(task))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If candidate CPU is the previous CPU, select it.
|
||||
* Otherwise, if its load is same with best_cpu and in
|
||||
* a shallower C-state, select it. If all above
|
||||
* conditions are same, select the least cumulative
|
||||
* window demand CPU.
|
||||
*/
|
||||
cpu_idle_exit_latency = walt_get_idle_exit_latency(cpu_rq(cpu));
|
||||
|
||||
util_cum = cpu_util_cum(cpu);
|
||||
if (cpu != task_cpu(task) && best_cpu_util == util) {
|
||||
if (best_idle_exit_latency < cpu_idle_exit_latency)
|
||||
continue;
|
||||
|
||||
if (best_idle_exit_latency == cpu_idle_exit_latency &&
|
||||
best_cpu_util_cum < util_cum)
|
||||
continue;
|
||||
}
|
||||
|
||||
best_idle_exit_latency = cpu_idle_exit_latency;
|
||||
best_cpu_util_cum = util_cum;
|
||||
best_cpu_util = util;
|
||||
*best_cpu = cpu;
|
||||
best_cpu_lt = lt;
|
||||
}
|
||||
if (cluster < end_index) {
|
||||
if (*best_cpu == -1 || !available_idle_cpu(*best_cpu))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*best_cpu != -1)
|
||||
break;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
|
||||
{
|
||||
unsigned int min_cap;
|
||||
unsigned int max_cap;
|
||||
unsigned int cpu_cap;
|
||||
|
||||
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
max_cap = uclamp_eff_value(p, UCLAMP_MAX);
|
||||
|
||||
cpu_cap = capacity_orig_of(cpu);
|
||||
|
||||
return cpu_cap >= min(min_cap, max_cap);
|
||||
}
|
||||
#else
|
||||
static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* walt specific should_honor_rt_sync (see rt.c). this will honor
|
||||
* the sync flag regardless of whether the current waker is cfs or rt
|
||||
*/
|
||||
static inline bool walt_should_honor_rt_sync(struct rq *rq, struct task_struct *p,
|
||||
bool sync)
|
||||
{
|
||||
return sync &&
|
||||
p->prio <= rq->rt.highest_prio.next &&
|
||||
rq->rt.rt_nr_running <= 2;
|
||||
}
|
||||
|
||||
enum rt_fastpaths {
|
||||
NONE = 0,
|
||||
NON_WAKEUP,
|
||||
SYNC_WAKEUP,
|
||||
CLUSTER_PACKING_FASTPATH,
|
||||
};
|
||||
|
||||
static void walt_select_task_rq_rt(void *unused, struct task_struct *task, int cpu,
|
||||
int sd_flag, int wake_flags, int *new_cpu)
|
||||
{
|
||||
struct task_struct *curr;
|
||||
struct rq *rq, *this_cpu_rq;
|
||||
bool may_not_preempt;
|
||||
bool sync = !!(wake_flags & WF_SYNC);
|
||||
int ret, target = -1, this_cpu;
|
||||
struct cpumask *lowest_mask = NULL;
|
||||
int packing_cpu = -1;
|
||||
int fastpath = NONE;
|
||||
struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
|
||||
struct walt_task_struct *wts;
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return;
|
||||
|
||||
/* For anything but wake ups, just return the task_cpu */
|
||||
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) {
|
||||
fastpath = NON_WAKEUP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
this_cpu = raw_smp_processor_id();
|
||||
this_cpu_rq = cpu_rq(this_cpu);
|
||||
wts = (struct walt_task_struct *) task->android_vendor_data1;
|
||||
|
||||
/*
|
||||
* Respect the sync flag as long as the task can run on this CPU.
|
||||
*/
|
||||
if (sysctl_sched_sync_hint_enable && cpu_active(this_cpu) && !cpu_halted(this_cpu) &&
|
||||
cpumask_test_cpu(this_cpu, task->cpus_ptr) &&
|
||||
cpumask_test_cpu(this_cpu, &wts->reduce_mask) &&
|
||||
walt_should_honor_rt_sync(this_cpu_rq, task, sync)) {
|
||||
fastpath = SYNC_WAKEUP;
|
||||
*new_cpu = this_cpu;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*new_cpu = cpu; /* previous CPU as back up */
|
||||
rq = cpu_rq(cpu);
|
||||
|
||||
rcu_read_lock();
|
||||
curr = READ_ONCE(rq->curr); /* unlocked access */
|
||||
|
||||
/*
|
||||
* If the current task on @p's runqueue is a softirq task,
|
||||
* it may run without preemption for a time that is
|
||||
* ill-suited for a waiting RT task. Therefore, try to
|
||||
* wake this RT task on another runqueue.
|
||||
*
|
||||
* Otherwise, just let it ride on the affined RQ and the
|
||||
* post-schedule router will push the preempted task away
|
||||
*
|
||||
* This test is optimistic, if we get it wrong the load-balancer
|
||||
* will have to sort it out.
|
||||
*
|
||||
* We take into account the capacity of the CPU to ensure it fits the
|
||||
* requirement of the task - which is only important on heterogeneous
|
||||
* systems like big.LITTLE.
|
||||
*/
|
||||
may_not_preempt = cpu_busy_with_softirqs(cpu);
|
||||
|
||||
lowest_mask = this_cpu_cpumask_var_ptr(walt_local_cpu_mask);
|
||||
|
||||
/*
|
||||
* If we're on asym system ensure we consider the different capacities
|
||||
* of the CPUs when searching for the lowest_mask.
|
||||
*/
|
||||
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, task,
|
||||
lowest_mask, walt_rt_task_fits_capacity);
|
||||
|
||||
packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
|
||||
if (packing_cpu >= 0) {
|
||||
while (packing_cpu < WALT_NR_CPUS) {
|
||||
if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
|
||||
cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
|
||||
cpu_active(packing_cpu) &&
|
||||
!cpu_halted(packing_cpu) &&
|
||||
(cpu_rq(packing_cpu)->rt.rt_nr_running <= 1))
|
||||
break;
|
||||
packing_cpu++;
|
||||
}
|
||||
|
||||
if (packing_cpu < WALT_NR_CPUS) {
|
||||
fastpath = CLUSTER_PACKING_FASTPATH;
|
||||
*new_cpu = packing_cpu;
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
|
||||
cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
|
||||
if (!cpumask_empty(&lowest_mask_reduced))
|
||||
walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, &target);
|
||||
if (target == -1)
|
||||
walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, &target);
|
||||
|
||||
/*
|
||||
* If cpu is non-preemptible, prefer remote cpu
|
||||
* even if it's running a higher-prio task.
|
||||
* Otherwise: Don't bother moving it if the destination CPU is
|
||||
* not running a lower priority task.
|
||||
*/
|
||||
if (target != -1 &&
|
||||
(may_not_preempt || task->prio < cpu_rq(target)->rt.highest_prio.curr))
|
||||
*new_cpu = target;
|
||||
|
||||
/* if backup or chosen cpu is halted, pick something else */
|
||||
if (cpu_halted(*new_cpu)) {
|
||||
cpumask_t non_halted;
|
||||
|
||||
/* choose the lowest-order, unhalted, allowed CPU */
|
||||
cpumask_andnot(&non_halted, task->cpus_ptr, cpu_halt_mask);
|
||||
target = cpumask_first(&non_halted);
|
||||
if (target < nr_cpu_ids)
|
||||
*new_cpu = target;
|
||||
}
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
trace_sched_select_task_rt(task, fastpath, *new_cpu, lowest_mask);
|
||||
}
|
||||
|
||||
|
||||
static void walt_rt_find_lowest_rq(void *unused, struct task_struct *task,
|
||||
struct cpumask *lowest_mask, int ret, int *best_cpu)
|
||||
|
||||
{
|
||||
int packing_cpu = -1;
|
||||
int fastpath = 0;
|
||||
struct walt_task_struct *wts;
|
||||
struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
|
||||
|
||||
if (unlikely(walt_disabled))
|
||||
return;
|
||||
|
||||
wts = (struct walt_task_struct *) task->android_vendor_data1;
|
||||
|
||||
packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
|
||||
if (packing_cpu >= 0) {
|
||||
while (packing_cpu < WALT_NR_CPUS) {
|
||||
if (cpumask_test_cpu(packing_cpu, &wts->reduce_mask) &&
|
||||
cpumask_test_cpu(packing_cpu, task->cpus_ptr) &&
|
||||
cpu_active(packing_cpu) &&
|
||||
!cpu_halted(packing_cpu) &&
|
||||
(cpu_rq(packing_cpu)->rt.rt_nr_running <= 2))
|
||||
break;
|
||||
packing_cpu++;
|
||||
}
|
||||
|
||||
if (packing_cpu < WALT_NR_CPUS) {
|
||||
fastpath = CLUSTER_PACKING_FASTPATH;
|
||||
*best_cpu = packing_cpu;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
|
||||
if (!cpumask_empty(&lowest_mask_reduced))
|
||||
walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, best_cpu);
|
||||
if (*best_cpu == -1)
|
||||
walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, best_cpu);
|
||||
|
||||
/*
|
||||
* Walt was not able to find a non-halted best cpu. Ensure that
|
||||
* find_lowest_rq doesn't use a halted cpu going forward, but
|
||||
* does a best effort itself to find a good CPU.
|
||||
*/
|
||||
if (*best_cpu == -1)
|
||||
cpumask_andnot(lowest_mask, lowest_mask, cpu_halt_mask);
|
||||
out:
|
||||
trace_sched_rt_find_lowest_rq(task, fastpath, *best_cpu, lowest_mask);
|
||||
}
|
||||
|
||||
void walt_rt_init(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (!(zalloc_cpumask_var_node(&per_cpu(walt_local_cpu_mask, i),
|
||||
GFP_KERNEL, cpu_to_node(i)))) {
|
||||
pr_err("walt_local_cpu_mask alloc failed for cpu%d\n", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
register_trace_android_rvh_select_task_rq_rt(walt_select_task_rq_rt, NULL);
|
||||
register_trace_android_rvh_find_lowest_rq(walt_rt_find_lowest_rq, NULL);
|
||||
}
|
161
kernel/sched/walt/walt_tp.c
Normal file
161
kernel/sched/walt/walt_tp.c
Normal file
@@ -0,0 +1,161 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/tracepoint.h>
|
||||
#include <trace/hooks/sched.h>
|
||||
#include "trace.h"
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "perf_trace_counters.h"
|
||||
|
||||
unsigned int sysctl_sched_dynamic_tp_enable;
|
||||
|
||||
#define USE_CPUHP_STATE CPUHP_AP_ONLINE_DYN
|
||||
|
||||
DEFINE_PER_CPU(u32, cntenset_val);
|
||||
DEFINE_PER_CPU(unsigned long, previous_ccnt);
|
||||
DEFINE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
|
||||
DEFINE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
|
||||
DEFINE_PER_CPU(u32, old_pid);
|
||||
DEFINE_PER_CPU(u32, hotplug_flag);
|
||||
DEFINE_PER_CPU(u64, prev_time);
|
||||
|
||||
static int tracectr_cpu_hotplug_coming_up(unsigned int cpu)
|
||||
{
|
||||
per_cpu(hotplug_flag, cpu) = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void setup_prev_cnts(u32 cpu, u32 cnten_val)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (cnten_val & CC)
|
||||
per_cpu(previous_ccnt, cpu) =
|
||||
read_sysreg(pmccntr_el0);
|
||||
|
||||
for (i = 0; i < NUM_L1_CTRS; i++) {
|
||||
if (cnten_val & (1 << i)) {
|
||||
/* Select */
|
||||
write_sysreg(i, pmselr_el0);
|
||||
isb();
|
||||
/* Read value */
|
||||
per_cpu(previous_l1_cnts[i], cpu) =
|
||||
read_sysreg(pmxevcntr_el0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void tracectr_notifier(void *ignore, bool preempt,
|
||||
struct task_struct *prev, struct task_struct *next,
|
||||
unsigned int prev_state)
|
||||
{
|
||||
u32 cnten_val;
|
||||
int current_pid;
|
||||
u32 cpu = task_cpu(next);
|
||||
u64 now;
|
||||
|
||||
if (!trace_sched_switch_with_ctrs_enabled())
|
||||
return;
|
||||
|
||||
current_pid = next->pid;
|
||||
if (per_cpu(old_pid, cpu) != -1) {
|
||||
cnten_val = read_sysreg(pmcntenset_el0);
|
||||
per_cpu(cntenset_val, cpu) = cnten_val;
|
||||
/* Disable all the counters that were enabled */
|
||||
write_sysreg(cnten_val, pmcntenclr_el0);
|
||||
|
||||
if (per_cpu(hotplug_flag, cpu) == 1) {
|
||||
per_cpu(hotplug_flag, cpu) = 0;
|
||||
setup_prev_cnts(cpu, cnten_val);
|
||||
} else {
|
||||
trace_sched_switch_with_ctrs(preempt, prev, next);
|
||||
now = sched_clock();
|
||||
if ((now - per_cpu(prev_time, cpu)) > NSEC_PER_SEC) {
|
||||
trace_sched_switch_ctrs_cfg(cpu);
|
||||
per_cpu(prev_time, cpu) = now;
|
||||
}
|
||||
}
|
||||
|
||||
/* Enable all the counters that were disabled */
|
||||
write_sysreg(cnten_val, pmcntenset_el0);
|
||||
}
|
||||
per_cpu(old_pid, cpu) = current_pid;
|
||||
}
|
||||
|
||||
static void register_sched_switch_ctrs(void)
|
||||
{
|
||||
int cpu, rc;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu(old_pid, cpu) = -1;
|
||||
|
||||
rc = cpuhp_setup_state_nocalls(USE_CPUHP_STATE, "tracectr_cpu_hotplug",
|
||||
tracectr_cpu_hotplug_coming_up, NULL);
|
||||
if (rc >= 0)
|
||||
register_trace_sched_switch(tracectr_notifier, NULL);
|
||||
}
|
||||
|
||||
static void unregister_sched_switch_ctrs(void)
|
||||
{
|
||||
unregister_trace_sched_switch(tracectr_notifier, NULL);
|
||||
cpuhp_remove_state_nocalls(USE_CPUHP_STATE);
|
||||
}
|
||||
|
||||
const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return rd ? rd->span : NULL;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void sched_overutilized(void *data, struct root_domain *rd,
|
||||
bool overutilized)
|
||||
{
|
||||
if (trace_sched_overutilized_enabled()) {
|
||||
char span[SPAN_SIZE];
|
||||
|
||||
cpumap_print_to_pagebuf(false, span, sched_trace_rd_span(rd));
|
||||
trace_sched_overutilized(overutilized, span);
|
||||
}
|
||||
}
|
||||
|
||||
static void walt_register_dynamic_tp_events(void)
|
||||
{
|
||||
register_trace_sched_overutilized_tp(sched_overutilized, NULL);
|
||||
register_sched_switch_ctrs();
|
||||
}
|
||||
|
||||
static void walt_unregister_dynamic_tp_events(void)
|
||||
{
|
||||
unregister_trace_sched_overutilized_tp(sched_overutilized, NULL);
|
||||
unregister_sched_switch_ctrs();
|
||||
}
|
||||
|
||||
int sched_dynamic_tp_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
static DEFINE_MUTEX(mutex);
|
||||
int ret = 0, *val = (unsigned int *)table->data;
|
||||
unsigned int old_val;
|
||||
|
||||
mutex_lock(&mutex);
|
||||
old_val = sysctl_sched_dynamic_tp_enable;
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (ret || !write || (old_val == sysctl_sched_dynamic_tp_enable))
|
||||
goto done;
|
||||
|
||||
if (*val)
|
||||
walt_register_dynamic_tp_events();
|
||||
else
|
||||
walt_unregister_dynamic_tp_events();
|
||||
done:
|
||||
mutex_unlock(&mutex);
|
||||
return ret;
|
||||
}
|
591
kernel/seccomp.c
591
kernel/seccomp.c
@@ -36,10 +36,6 @@
|
||||
#include <asm/syscall.h>
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SECURITY_DSMS) && defined(CONFIG_SECURITY_KUMIHO)
|
||||
#include <linux/dsms.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SECCOMP_FILTER
|
||||
#include <linux/file.h>
|
||||
#include <linux/filter.h>
|
||||
@@ -964,583 +960,8 @@ static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
|
||||
SECCOMP_LOG_TRACE |
|
||||
SECCOMP_LOG_LOG;
|
||||
|
||||
/* SEC_PRODUCT_FEATURE_SECURITY_SUPPORT_DSMS { */
|
||||
#if defined(CONFIG_SECURITY_DSMS) && defined(CONFIG_SECURITY_KUMIHO)
|
||||
|
||||
/* append_string_s: append simple string to a buffer
|
||||
* @target: pointer to a string, which is updated on success to
|
||||
* point to the next available space
|
||||
* @available_size: pointer to count of available bytes in target, including
|
||||
* terminator; updated on success
|
||||
* @source: nonnull pointer to text (zero-terminated, unless @source_len > 0)
|
||||
* to be appended to *target
|
||||
* @source_len: if > 0, exactly the number of bytes in @source which will be
|
||||
* appended
|
||||
* Returns 0 if *source was completely copied, 1 otherwise (null source,
|
||||
* or not enough space in *target)
|
||||
*/
|
||||
static int append_string_s(char **target, int *available_size,
|
||||
const char *source, int source_len)
|
||||
{
|
||||
if (!source) // sanity check
|
||||
return 1;
|
||||
while (*available_size > 1 && (source_len > 0 || *source)) {
|
||||
*((*target)++) = *source++;
|
||||
--(*available_size);
|
||||
if (source_len > 0)
|
||||
--source_len;
|
||||
}
|
||||
if (*available_size > 0)
|
||||
**target = 0;
|
||||
return *source != 0; // copy terminated prematurely
|
||||
}
|
||||
|
||||
/* append_string: append to a buffer message, optionally quoting/escaping
|
||||
* @target: pointer to a string address, which is updated on success to
|
||||
* point to the next available space
|
||||
* @available_size: pointer to count of available bytes in target, including
|
||||
* terminator; updated on success
|
||||
* @source: string to be appended to *target; if @source_length is zero,
|
||||
* must be zero-terminated
|
||||
* @source_len: if > 0, exactly the number of bytes in @source which will be
|
||||
* appended
|
||||
* @quote_escape: if true, add open/closing quotes and escapes nongraphic
|
||||
* characters
|
||||
* Returns 0 if *source was completely copied, 1 otherwise
|
||||
*/
|
||||
static int append_string(char **target, int *available_size,
|
||||
char *source, int source_length,
|
||||
int quote_escape)
|
||||
{
|
||||
if (source_length > 0)
|
||||
source[--source_length] = 0;
|
||||
if (quote_escape) {
|
||||
const char *p;
|
||||
|
||||
if (*available_size < 2)
|
||||
return 1;
|
||||
*((*target)++) = '"';
|
||||
--(*available_size);
|
||||
|
||||
for (p = source; source_length > 0 || *p; ++p) {
|
||||
char ss[5];
|
||||
|
||||
ss[2] = 0;
|
||||
switch (*p) {
|
||||
case '\t':
|
||||
*ss = '\\'; ss[1] = 't'; break;
|
||||
case '\n':
|
||||
*ss = '\\'; ss[1] = 'n'; break;
|
||||
case '\r':
|
||||
*ss = '\\'; ss[1] = 'r'; break;
|
||||
case '\\':
|
||||
*ss = '\\'; ss[1] = '\\'; break;
|
||||
case '"':
|
||||
*ss = '\\'; ss[1] = '"'; break;
|
||||
default:
|
||||
if (*(unsigned char *)p < ' ' ||
|
||||
*(unsigned char *)p > 127) {
|
||||
sprintf(ss, "\\%03o",
|
||||
*(unsigned char *)p);
|
||||
} else { // ordinary character
|
||||
*ss = *p;
|
||||
ss[1] = 0;
|
||||
}
|
||||
}
|
||||
if (append_string_s(target, available_size, ss, 0))
|
||||
return 1;
|
||||
if (source_length > 0)
|
||||
--source_length;
|
||||
}
|
||||
return append_string_s(target, available_size, "\"", 0);
|
||||
}
|
||||
return append_string_s(target, available_size, source, source_length);
|
||||
}
|
||||
|
||||
/* append_string_f: append formatted data to a buffer message, optionally
|
||||
* quoting/escaping
|
||||
* @target: pointer to a string address, which is updated on success to
|
||||
* point to the next available space
|
||||
* @available_size: pointer to count of available bytes in target, including
|
||||
* terminator; updated on success
|
||||
* @aux_buffer: pointer to an auxiliary buffer, which should be enough for
|
||||
* holding all formatted arguments
|
||||
* @aux_buffer_size: size of @aux_buffer
|
||||
* @quote_escape: if true, add open/closing quotes and escapes nongraphic
|
||||
* characters
|
||||
* @format: formatting string, printf-style
|
||||
* All additional arguments are formatted into @aux_buffer
|
||||
* Returns 0 if all arguments were formatted and completely copied, 1 otherwise
|
||||
*/
|
||||
static int append_string_f(char **target, int *available_size,
|
||||
char *aux_buffer, size_t aux_buffer_size,
|
||||
int quote_escape, const char *format, ...)
|
||||
{
|
||||
size_t vsnp_ret;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
vsnp_ret = vsnprintf(aux_buffer, aux_buffer_size, format, ap);
|
||||
va_end(ap);
|
||||
return append_string(target, available_size, aux_buffer, 0,
|
||||
quote_escape) || vsnp_ret >= aux_buffer_size;
|
||||
}
|
||||
|
||||
/* clone_from_user: returns copy of userspace region, if possibile
|
||||
* @dst: copy destination; if 0, allocate space
|
||||
* @src: userspace address
|
||||
* @size: address of size of region to be copied; will be updated with
|
||||
* count of effectively copied bytes
|
||||
* @buffer: pointer to a string address, used to record any diagnostic
|
||||
* messages; will be updated to point to the next available space
|
||||
* @buffer_size: pointer to count of available bytes in @buffer, including
|
||||
* terminator; updated after use
|
||||
* @task_name: short nonnull tag to identify caller
|
||||
* @ne: pointer to flag, which wil be nonzero if @buffer_size was not
|
||||
* enough to hold all diagnostic messages
|
||||
* Returns effective destination, 0 if @src was invalid or allocation failed
|
||||
*/
|
||||
static void *clone_from_user(void *dst, const void *src, size_t *size,
|
||||
char **buffer, int *buffer_size, const char *task_name, int *ne)
|
||||
{
|
||||
void *eff_dst;
|
||||
char aux_buffer[100];
|
||||
size_t uncopied_size;
|
||||
|
||||
if (!src) {
|
||||
*ne |= append_string_f(buffer, buffer_size,
|
||||
aux_buffer, sizeof(aux_buffer), 0,
|
||||
" (%s: null src)", task_name);
|
||||
*size = 0;
|
||||
return 0;
|
||||
}
|
||||
if (!dst) {
|
||||
eff_dst = kcalloc(1, *size, GFP_KERNEL);
|
||||
if (!eff_dst) {
|
||||
*ne |= append_string_f(buffer, buffer_size,
|
||||
aux_buffer, sizeof(aux_buffer), 0,
|
||||
" (%s: failed alloc)", task_name);
|
||||
*size = 0;
|
||||
return 0;
|
||||
}
|
||||
} else
|
||||
eff_dst = dst;
|
||||
uncopied_size = copy_from_user(eff_dst, src, *size);
|
||||
if (uncopied_size)
|
||||
*ne |= append_string_f(buffer, buffer_size,
|
||||
aux_buffer, sizeof(aux_buffer), 0,
|
||||
" (%s: copied only %zu of %zu bytes)",
|
||||
task_name, *size - uncopied_size, *size);
|
||||
*size -= uncopied_size;
|
||||
return eff_dst;
|
||||
}
|
||||
|
||||
/* Descriptor of syscalls for a more user-friendly display */
|
||||
struct syscall_api {
|
||||
int nr; // key: syscall number
|
||||
const char *name; // user-readable name
|
||||
unsigned char nargs; // argument count
|
||||
unsigned char arg_str; // bitmap marking which arguments are text strings
|
||||
int (*dump)(char **buffer, // optional custom formatter
|
||||
int *available_size, const struct seccomp_data *sd);
|
||||
// Constants for struct syscall_api.arg_str
|
||||
#define AS0 1 // first argument is a string
|
||||
#define AS1 (1 << 1)
|
||||
#define AS2 (1 << 2)
|
||||
#define AS3 (1 << 3)
|
||||
#define AS4 (1 << 4)
|
||||
#define AS5 (1 << 5)
|
||||
};
|
||||
|
||||
#include <uapi/linux/un.h> // sockaddr_un
|
||||
|
||||
/* Specialized formatter for some kinds of socket address */
|
||||
static int dump_sockaddr(char **buffer, int *buffer_size,
|
||||
char *aux_buffer, size_t aux_buffer_size,
|
||||
const struct sockaddr *s_addr, int addr_len)
|
||||
{
|
||||
int ne = append_string_f(buffer, buffer_size,
|
||||
aux_buffer, aux_buffer_size, 0,
|
||||
" fam %d", s_addr->sa_family);
|
||||
|
||||
if (!ne)
|
||||
switch (s_addr->sa_family) {
|
||||
case AF_UNIX:
|
||||
if (addr_len >= sizeof(struct sockaddr_un)) {
|
||||
struct sockaddr_un *s_un =
|
||||
(struct sockaddr_un *)s_addr;
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
aux_buffer, aux_buffer_size, 0,
|
||||
" UN \"%s\"", s_un->sun_path);
|
||||
}
|
||||
break;
|
||||
case AF_INET:
|
||||
if (addr_len >= sizeof(struct sockaddr_in)) {
|
||||
struct sockaddr_in *s_in =
|
||||
(struct sockaddr_in *)s_addr;
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
aux_buffer, aux_buffer_size, 0,
|
||||
" IP P%u A%pI4",
|
||||
s_in->sin_port, &s_in->sin_addr);
|
||||
}
|
||||
break;
|
||||
case AF_INET6:
|
||||
if (addr_len >= sizeof(struct sockaddr_in6)) {
|
||||
struct sockaddr_in6 *s_in =
|
||||
(struct sockaddr_in6 *)s_addr;
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
aux_buffer, aux_buffer_size, 0,
|
||||
" IP6 P%uFI%u A%pI6 S%u",
|
||||
s_in->sin6_port, s_in->sin6_flowinfo,
|
||||
&s_in->sin6_addr, s_in->sin6_scope_id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return ne;
|
||||
}
|
||||
|
||||
/* Specialized formatter for struct msghdr */
|
||||
static int dump_msghdr(char **buffer, int *buffer_size,
|
||||
char *aux_buffer, size_t aux_buffer_size,
|
||||
const struct user_msghdr *msg, int user_flags)
|
||||
{
|
||||
int ne = append_string_f(buffer, buffer_size, aux_buffer,
|
||||
aux_buffer_size, 0,
|
||||
" namelen %d iovlen %lu controllen %zu flags %u uflags %d",
|
||||
msg->msg_namelen, msg->msg_iovlen,
|
||||
msg->msg_controllen, msg->msg_flags, user_flags);
|
||||
|
||||
if (ne)
|
||||
return 1;
|
||||
if (msg->msg_iovlen > 0) { /* Process message part contents */
|
||||
struct iovec *iovec_p;
|
||||
size_t eff_iovec_size = sizeof(struct iovec) * msg->msg_iovlen;
|
||||
|
||||
iovec_p = clone_from_user(0, (void *)msg->msg_iov,
|
||||
&eff_iovec_size, buffer, buffer_size,
|
||||
"iovec", &ne);
|
||||
|
||||
if (eff_iovec_size) {
|
||||
/* For each message part dump its index,
|
||||
* length and contents (up to DUMP_MAX bytes)
|
||||
*/
|
||||
int i;
|
||||
#define DUMP_MAX 20 // arbitrary
|
||||
|
||||
for (i = 0;
|
||||
!ne &&
|
||||
i < eff_iovec_size / sizeof(struct iovec);
|
||||
++i) {
|
||||
size_t part_len = iovec_p[i].iov_len;
|
||||
char bbuffer[20];
|
||||
unsigned char *part;
|
||||
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
bbuffer, sizeof(bbuffer), 0,
|
||||
" M%d(%zu):", i, part_len);
|
||||
if (ne)
|
||||
break;
|
||||
if (part_len > DUMP_MAX)
|
||||
part_len = DUMP_MAX;
|
||||
part = clone_from_user(0,
|
||||
(void *)iovec_p[i].iov_base,
|
||||
&part_len, buffer, buffer_size,
|
||||
"iovec part", &ne);
|
||||
if (part_len) {
|
||||
ne |= append_string(buffer,
|
||||
buffer_size,
|
||||
part,
|
||||
part_len, 1);
|
||||
}
|
||||
kfree(part);
|
||||
}
|
||||
#undef DUMP_MAX
|
||||
}
|
||||
kfree(iovec_p);
|
||||
}
|
||||
if (msg->msg_namelen > 1 && msg->msg_name) {
|
||||
/* process message destination, if any; probably nononessential
|
||||
* if dump_sockaddr is called too
|
||||
*/
|
||||
char *name_copy; // copy of msg->msg_name from userspace
|
||||
size_t namelen = msg->msg_namelen; // effective length after copying from userspace
|
||||
|
||||
ne |= append_string_s(buffer, buffer_size, " {", 0);
|
||||
name_copy = clone_from_user(0, msg->msg_name,
|
||||
&namelen, buffer, buffer_size, "name", &ne);
|
||||
if (!name_copy)
|
||||
return ne;
|
||||
if (namelen >= sizeof(struct sockaddr_in)) {
|
||||
/* Maybe IPv4? */
|
||||
struct sockaddr_in *sin =
|
||||
(struct sockaddr_in *)name_copy;
|
||||
char sin_buf[3 + 6 + 6 + 4 * 4 + 10];
|
||||
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
sin_buf, sizeof(sin_buf), 0,
|
||||
"IP F%uP%u A%pI4",
|
||||
sin->sin_family, sin->sin_port,
|
||||
&sin->sin_addr);
|
||||
}
|
||||
if (namelen >= sizeof(struct sockaddr_in6)) {
|
||||
/* Maybe IPv6? */
|
||||
struct sockaddr_in6 *sin =
|
||||
(struct sockaddr_in6 *)name_copy;
|
||||
char sin_buf[4 + 6 + 6 + 12 + 8 * 5 + 12 + 10];
|
||||
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
sin_buf, sizeof(sin_buf), 0,
|
||||
" IP6 F%uP%uFI%u A%pI6 S%u",
|
||||
sin->sin6_family, sin->sin6_port,
|
||||
sin->sin6_flowinfo, &sin->sin6_addr,
|
||||
sin->sin6_scope_id);
|
||||
}
|
||||
ne |= append_string_s(buffer, buffer_size, "}", 0);
|
||||
kfree(name_copy);
|
||||
}
|
||||
return ne;
|
||||
}
|
||||
|
||||
/* Specialized formatter for the sendmsg syscall */
|
||||
static int dump_sendmsg(char **buffer, int *buffer_size,
|
||||
const struct seccomp_data *sd)
|
||||
{
|
||||
int ne; // *buffer_size was not enough, something was truncated
|
||||
#define BUFFER_SZ 500 /* size of auxiliary buffer for assorted data */
|
||||
char *sbuffer = kcalloc(1, BUFFER_SZ, GFP_KERNEL);
|
||||
|
||||
if (!sbuffer)
|
||||
return 1;
|
||||
ne = append_string_f(buffer, buffer_size, sbuffer, BUFFER_SZ, 0,
|
||||
" sock {fd %lld", sd->args[0]);
|
||||
if (ne)
|
||||
goto end;
|
||||
{ /* Dump information on socket's peer */
|
||||
int err;
|
||||
struct socket *s_socket = sockfd_lookup(sd->args[0], &err);
|
||||
|
||||
if (s_socket) {
|
||||
struct sockaddr s_addr;
|
||||
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
sbuffer, BUFFER_SZ, 0,
|
||||
" type %d", s_socket->type);
|
||||
if (ne)
|
||||
goto end;
|
||||
err = kernel_getpeername(s_socket, &s_addr);
|
||||
if (err > 0)
|
||||
ne |= dump_sockaddr(buffer, buffer_size,
|
||||
sbuffer, BUFFER_SZ, &s_addr, err);
|
||||
} else {
|
||||
ne |= append_string_f(buffer, buffer_size,
|
||||
sbuffer, BUFFER_SZ, 0,
|
||||
" (socket lookup failed %d)", err);
|
||||
}
|
||||
if (ne)
|
||||
goto end;
|
||||
}
|
||||
ne = append_string_s(buffer, buffer_size, "}", 0);
|
||||
if (!ne && sd->args[1]) {
|
||||
struct user_msghdr msg;
|
||||
|
||||
ne = copy_from_user((void *)&msg,
|
||||
(void *)sd->args[1], sizeof(msg))
|
||||
? append_string_s(buffer, buffer_size,
|
||||
"(failed to copy)", 0)
|
||||
: dump_msghdr(buffer, buffer_size, sbuffer, BUFFER_SZ,
|
||||
&msg, sd->args[2]);
|
||||
}
|
||||
end:
|
||||
kfree(sbuffer);
|
||||
return ne;
|
||||
#undef BUFFER_SZ
|
||||
}
|
||||
|
||||
/* Default formatter for syscalls. Dumps parameters as numbers and strings. */
|
||||
static int dump_syscall_default(char **buffer, int *buffer_size,
|
||||
const struct syscall_api *api,
|
||||
const struct seccomp_data *sd)
|
||||
{
|
||||
int ne = 0;
|
||||
size_t j;
|
||||
|
||||
#define DUMP_MAX 1000 // size should be at most MSG_SZ
|
||||
for (j = 0; j < ARRAY_SIZE(sd->args) && j < api->nargs; ++j) {
|
||||
if (api->arg_str & (1 << j)) { // parameter is a string
|
||||
char quote = 1;
|
||||
const char *txt = (const char *)sd->args[j];
|
||||
char *u_bufferp;
|
||||
size_t u_buffersz = DUMP_MAX;
|
||||
|
||||
if (!txt)
|
||||
quote = 0;
|
||||
u_bufferp = clone_from_user(0, txt, &u_buffersz,
|
||||
buffer, buffer_size, "args", &ne);
|
||||
if (u_buffersz) {
|
||||
if (append_string_s(buffer, buffer_size, " ", 0) ||
|
||||
append_string(buffer, buffer_size, u_bufferp,
|
||||
u_buffersz, quote))
|
||||
ne = 1;
|
||||
}
|
||||
kfree(u_bufferp);
|
||||
if (ne)
|
||||
break;
|
||||
} else {
|
||||
char sbuffer[20];
|
||||
|
||||
ne |= append_string_f(buffer, buffer_size, sbuffer,
|
||||
sizeof(sbuffer), 0, " %lld", sd->args[j]);
|
||||
if (ne)
|
||||
break;
|
||||
}
|
||||
}
|
||||
#undef DUMP_MAX
|
||||
return ne;
|
||||
}
|
||||
|
||||
/* dump_syscall_base: generate string summarizing call arguments
|
||||
* @buffer: target string
|
||||
* @buffer_size: target size, including terminator
|
||||
* @sd: seccomp invocation descriptor
|
||||
* Returns 0 if successful, 1 if text was clipped
|
||||
*/
|
||||
static int dump_syscall_base(char *buffer, int buffer_size,
|
||||
const struct seccomp_data *sd)
|
||||
{
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||
|
||||
static struct syscall_api apis[] = {
|
||||
{__NR_read, "read", 3},
|
||||
{__NR_write, "write", 3},
|
||||
#ifdef __NR_open
|
||||
{__NR_open, "open", 3, AS0},
|
||||
#endif
|
||||
{__NR_close, "close", 1},
|
||||
#ifdef __NR_stat
|
||||
{__NR_stat, "stat", 2, AS0},
|
||||
#endif
|
||||
{__NR_fstat, "fstat", 2},
|
||||
#ifdef __NR_lstat
|
||||
{__NR_lstat, "lstat", 2, AS0},
|
||||
#endif
|
||||
{__NR_sendto, "sendto", 6},
|
||||
{__NR_sendmsg, "sendmsg", 3, 0, dump_sendmsg},
|
||||
#ifdef __NR_unlinkat
|
||||
{__NR_unlinkat, "unlinkat", 3, AS1},
|
||||
#endif
|
||||
#ifdef __NR_renameat
|
||||
{__NR_renameat, "renameat", 4, AS1 | AS3},
|
||||
#endif
|
||||
#ifdef __NR_statfs
|
||||
{__NR_statfs, "statfs", 2, AS0},
|
||||
#endif
|
||||
#ifdef __NR_faccessat
|
||||
{__NR_faccessat, "faccessat", 3, AS1},
|
||||
#endif
|
||||
#ifdef __NR_chmodat
|
||||
{__NR_fchmodat, "fchmodat", 3, AS1},
|
||||
#endif
|
||||
#ifdef __NR_openat
|
||||
{__NR_openat, "openat", 4, AS1},
|
||||
#endif
|
||||
#ifdef __NR_readlinkat
|
||||
{__NR_readlinkat, "readlinkat", 4, AS1},
|
||||
#endif
|
||||
};
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
char sbuffer[100];
|
||||
size_t i;
|
||||
char ne = 0; /* buffer_size was not enough */
|
||||
char syscall_found = 0;
|
||||
|
||||
if (buffer_size < 1)
|
||||
return 1;
|
||||
*buffer = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(apis); ++i)
|
||||
if (apis[i].nr == sd->nr) {
|
||||
syscall_found = 1;
|
||||
ne = append_string_f(&buffer, &buffer_size,
|
||||
sbuffer, sizeof(sbuffer), 0,
|
||||
"SC %d/%s", sd->nr, apis[i].name)
|
||||
|| (apis[i].dump
|
||||
? apis[i].dump(&buffer, &buffer_size, sd)
|
||||
: dump_syscall_default(&buffer, &buffer_size,
|
||||
apis + i, sd));
|
||||
break;
|
||||
}
|
||||
if (!syscall_found) {
|
||||
ne |= append_string_f(&buffer, &buffer_size,
|
||||
sbuffer, sizeof(sbuffer), 0, "SC %d", sd->nr);
|
||||
if (!ne)
|
||||
for (i = 0; i < ARRAY_SIZE(sd->args); ++i) {
|
||||
ne |= append_string_f(&buffer, &buffer_size,
|
||||
sbuffer, sizeof(sbuffer), 0,
|
||||
" %lld", sd->args[i]);
|
||||
if (ne)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ne;
|
||||
}
|
||||
|
||||
/* dump_syscall: format string describing syscall caller and arguments
|
||||
* @buffer: target string, at least 4 chars long
|
||||
* @buffer_size: available target size, including terminator
|
||||
* @command: command of process invoking syscall
|
||||
* @signr: signal number
|
||||
* @sd: nonnull pointer to seccomp descriptor
|
||||
*/
|
||||
static void dump_syscall(char *buffer, int buffer_size, const char *command,
|
||||
long signr, const struct seccomp_data *sd)
|
||||
{
|
||||
int n_copied = snprintf(buffer, buffer_size,
|
||||
"seccomp '%s' signum %ld pid %d uid %d ",
|
||||
command, signr, current->pid, current_uid().val);
|
||||
n_copied = n_copied < buffer_size
|
||||
? dump_syscall_base(buffer + n_copied, buffer_size - n_copied,
|
||||
sd)
|
||||
: 1;
|
||||
if (n_copied) // something was truncated
|
||||
strscpy(buffer + buffer_size - 4, "...", sizeof("..."));
|
||||
}
|
||||
|
||||
#define MSG_SZ 1024 // Limit actually set by DSMS
|
||||
|
||||
noinline void seccomp_notify_dsms(unsigned long syscall, long signr, u32 action,
|
||||
const struct seccomp_data *sd)
|
||||
{
|
||||
/* The current thread command may be different from the main thread */
|
||||
struct task_struct *main_thread = current->group_leader;
|
||||
char comm_buf[sizeof(main_thread->comm)];
|
||||
|
||||
get_task_comm(comm_buf, main_thread);
|
||||
if (unlikely(strncmp("kumihodecoder", comm_buf, sizeof(main_thread->comm)) == 0)) {
|
||||
char *msg = kcalloc(1, MSG_SZ, GFP_KERNEL);
|
||||
int i;
|
||||
|
||||
if (msg) {
|
||||
dump_syscall(msg, MSG_SZ, comm_buf, signr, sd);
|
||||
i = dsms_send_message("KMH0", msg, action);
|
||||
if (unlikely(i != DSMS_SUCCESS))
|
||||
pr_warn("%s::dsms_send_message failed: error %d msg <%s>\n",
|
||||
__func__, i, msg);
|
||||
kfree(msg);
|
||||
} else
|
||||
pr_warn("%s: out of memory", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
#undef MSG_SZ
|
||||
|
||||
#else
|
||||
#define seccomp_notify_dsms(syscall, signumber, action, sd) /* nothing */
|
||||
#endif
|
||||
/* SEC_PRODUCT_FEATURE_SECURITY_SUPPORT_DSMS } */
|
||||
|
||||
static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
|
||||
bool requested, const struct seccomp_data *sd)
|
||||
bool requested)
|
||||
{
|
||||
bool log = false;
|
||||
|
||||
@@ -1569,8 +990,6 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
|
||||
default:
|
||||
log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
|
||||
}
|
||||
if (action != SECCOMP_RET_ALLOW)
|
||||
seccomp_notify_dsms(syscall, signr, action, sd);
|
||||
|
||||
/*
|
||||
* Emit an audit message when the action is RET_KILL_*, RET_LOG, or the
|
||||
@@ -1610,7 +1029,7 @@ static void __secure_computing_strict(int this_syscall)
|
||||
dump_stack();
|
||||
#endif
|
||||
current->seccomp.mode = SECCOMP_MODE_DEAD;
|
||||
seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true, 0);
|
||||
seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
|
||||
do_exit(SIGKILL);
|
||||
}
|
||||
|
||||
@@ -1870,7 +1289,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
return 0;
|
||||
|
||||
case SECCOMP_RET_LOG:
|
||||
seccomp_log(this_syscall, 0, action, true, sd);
|
||||
seccomp_log(this_syscall, 0, action, true);
|
||||
return 0;
|
||||
|
||||
case SECCOMP_RET_ALLOW:
|
||||
@@ -1885,7 +1304,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
case SECCOMP_RET_KILL_PROCESS:
|
||||
default:
|
||||
current->seccomp.mode = SECCOMP_MODE_DEAD;
|
||||
seccomp_log(this_syscall, SIGSYS, action, true, sd);
|
||||
seccomp_log(this_syscall, SIGSYS, action, true);
|
||||
/* Dump core only if this is the last remaining thread. */
|
||||
if (action != SECCOMP_RET_KILL_THREAD ||
|
||||
(atomic_read(¤t->signal->live) == 1)) {
|
||||
@@ -1902,7 +1321,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
unreachable();
|
||||
|
||||
skip:
|
||||
seccomp_log(this_syscall, 0, action, match ? match->log : false, sd);
|
||||
seccomp_log(this_syscall, 0, action, match ? match->log : false);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
|
@@ -58,10 +58,6 @@
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/syscall.h> /* for syscall_get_* */
|
||||
|
||||
#ifdef CONFIG_SAMSUNG_FREECESS
|
||||
#include <linux/freecess.h>
|
||||
#endif
|
||||
|
||||
#undef CREATE_TRACE_POINTS
|
||||
#include <trace/hooks/signal.h>
|
||||
#include <trace/hooks/dtask.h>
|
||||
@@ -1319,18 +1315,6 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p
|
||||
unsigned long flags;
|
||||
int ret = -ESRCH;
|
||||
trace_android_vh_do_send_sig_info(sig, current, p);
|
||||
|
||||
#ifdef CONFIG_SAMSUNG_FREECESS
|
||||
/*
|
||||
* System will send SIGIO to the app that locked the file when other apps access the file.
|
||||
* Report SIGIO to prevent other apps from getting stuck
|
||||
*/
|
||||
if ((sig == SIGKILL || sig == SIGTERM || sig == SIGABRT || sig == SIGQUIT || sig == SIGIO)) {
|
||||
/* Report pid if signal is fatal */
|
||||
sig_report(p, sig != SIGIO);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (lock_task_sighand(p, &flags)) {
|
||||
ret = send_signal_locked(sig, info, p, type);
|
||||
unlock_task_sighand(p, &flags);
|
||||
|
12
kernel/sys.c
12
kernel/sys.c
@@ -75,10 +75,6 @@
|
||||
#include <asm/io.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
#include <linux/defex.h>
|
||||
#endif
|
||||
|
||||
#include "uid16.h"
|
||||
|
||||
#include <trace/hooks/sys.h>
|
||||
@@ -880,10 +876,6 @@ long __sys_setfsuid(uid_t uid)
|
||||
if (!uid_valid(kuid))
|
||||
return old_fsuid;
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
if (task_defex_enforce(current, NULL, -__NR_setfsuid))
|
||||
return old_fsuid;
|
||||
#endif
|
||||
new = prepare_creds();
|
||||
if (!new)
|
||||
return old_fsuid;
|
||||
@@ -928,10 +920,6 @@ long __sys_setfsgid(gid_t gid)
|
||||
if (!gid_valid(kgid))
|
||||
return old_fsgid;
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
if (task_defex_enforce(current, NULL, -__NR_setfsgid))
|
||||
return old_fsgid;
|
||||
#endif
|
||||
new = prepare_creds();
|
||||
if (!new)
|
||||
return old_fsgid;
|
||||
|
@@ -63,8 +63,6 @@ static struct rtc_timer rtctimer;
|
||||
static struct rtc_device *rtcdev;
|
||||
static DEFINE_SPINLOCK(rtcdev_lock);
|
||||
|
||||
extern void log_suspend_abort_reason(const char *fmt, ...);
|
||||
|
||||
/**
|
||||
* alarmtimer_get_rtcdev - Return selected rtcdevice
|
||||
*
|
||||
@@ -195,7 +193,6 @@ static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
|
||||
* timers queued for the future, we set the hrtimer to fire when
|
||||
* the next future alarm timer expires.
|
||||
*/
|
||||
#define MAX_FUNC_NAME 20
|
||||
static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
|
||||
{
|
||||
struct alarm *alarm = container_of(timer, struct alarm, timer);
|
||||
@@ -203,18 +200,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
|
||||
unsigned long flags;
|
||||
int ret = HRTIMER_NORESTART;
|
||||
int restart = ALARMTIMER_NORESTART;
|
||||
char func_name[MAX_FUNC_NAME];
|
||||
|
||||
spin_lock_irqsave(&base->lock, flags);
|
||||
alarmtimer_dequeue(base, alarm);
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
|
||||
if (alarm->function) {
|
||||
if (alarm->function)
|
||||
restart = alarm->function(alarm, base->get_ktime());
|
||||
snprintf(func_name, MAX_FUNC_NAME, "%ps\n", alarm->function);
|
||||
if (strncmp(func_name, "timerfd_alarmproc", strlen("timerfd_alarmproc")))
|
||||
pr_info("PM: %ps is fired!\n", alarm->function);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&base->lock, flags);
|
||||
if (restart != ALARMTIMER_NORESTART) {
|
||||
@@ -253,7 +245,6 @@ static int alarmtimer_suspend(struct device *dev)
|
||||
struct rtc_device *rtc;
|
||||
unsigned long flags;
|
||||
struct rtc_time tm;
|
||||
struct alarm *min_alarm = NULL;
|
||||
|
||||
spin_lock_irqsave(&freezer_delta_lock, flags);
|
||||
min = freezer_delta;
|
||||
@@ -283,22 +274,12 @@ static int alarmtimer_suspend(struct device *dev)
|
||||
expires = next->expires;
|
||||
min = delta;
|
||||
type = i;
|
||||
min_alarm = container_of(next, struct alarm, node);
|
||||
}
|
||||
}
|
||||
if (min == 0)
|
||||
return 0;
|
||||
|
||||
if (min_alarm)
|
||||
pr_info("soonest alarm : %ps\n", min_alarm->function);
|
||||
|
||||
if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
|
||||
if (min_alarm) {
|
||||
pr_info("alarmtimer suspending blocked by %ps\n", min_alarm->function);
|
||||
log_suspend_abort_reason("alarmtimer suspending blocked by %ps\n",
|
||||
min_alarm->function);
|
||||
}
|
||||
|
||||
pm_wakeup_event(dev, 2 * MSEC_PER_SEC);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
@@ -147,6 +147,35 @@ config PREEMPTIRQ_TRACEPOINTS
|
||||
Create preempt/irq toggle tracepoints if needed, so that other parts
|
||||
of the kernel can use them to generate or add hooks to them.
|
||||
|
||||
config IPC_LOGGING
|
||||
tristate "Debug Logging for IPC Drivers"
|
||||
select GENERIC_TRACER
|
||||
depends on DEBUG_FS
|
||||
help
|
||||
IPC Logging driver provides a logging option for IPC Drivers.
|
||||
This provides a cyclic buffer based logging support in a driver
|
||||
specific context. This driver also provides a debugfs interface
|
||||
to dump the logs in a live fashion.
|
||||
|
||||
If in doubt, say no.
|
||||
|
||||
config IPC_LOGGING_CDEV
|
||||
tristate "Ipc Logging Character Device"
|
||||
depends on IPC_LOGGING
|
||||
help
|
||||
Character device for ipc logging. Reading it will extract ipc logs up to
|
||||
the specified size and increment the read index of the ipc log buffer.
|
||||
Read function will return EOF when there is no longer any data to read
|
||||
in the ipc log buffer.
|
||||
|
||||
config IPC_LOG_MINIDUMP_BUFFERS
|
||||
int "Ipc log buffers count that can be dumped with minidump"
|
||||
depends on IPC_LOGGING
|
||||
default 0
|
||||
help
|
||||
This option is used to configure maximum number of ipc log
|
||||
buffers that can be dumped by minidump.
|
||||
|
||||
# All tracer options should select GENERIC_TRACER. For those options that are
|
||||
# enabled by all tracers (context switch and event tracer) they select TRACING.
|
||||
# This allows those options to appear when no other tracer is selected. But the
|
||||
|
@@ -110,4 +110,8 @@ obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
|
||||
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
|
||||
obj-$(CONFIG_RV) += rv/
|
||||
|
||||
obj-$(CONFIG_IPC_LOGGING) += qcom_ipc_logging.o
|
||||
qcom_ipc_logging-y := ipc_logging.o ipc_logging_debug.o
|
||||
qcom_ipc_logging-$(CONFIG_IPC_LOGGING_CDEV) += ipc_logging_cdev.o
|
||||
|
||||
libftrace-y := ftrace.o
|
||||
|
1095
kernel/trace/ipc_logging.c
Normal file
1095
kernel/trace/ipc_logging.c
Normal file
File diff suppressed because it is too large
Load Diff
197
kernel/trace/ipc_logging_cdev.c
Normal file
197
kernel/trace/ipc_logging_cdev.c
Normal file
@@ -0,0 +1,197 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/ipc_logging.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include "ipc_logging_private.h"
|
||||
|
||||
#define IPL_CDEV_MAX 255
|
||||
|
||||
static dev_t cdev_devt;
|
||||
static struct class *cdev_class;
|
||||
static DEFINE_IDA(ipl_minor_ida);
|
||||
|
||||
static void dfunc_string(struct encode_context *ectxt, struct decode_context *dctxt)
|
||||
{
|
||||
tsv_timestamp_read(ectxt, dctxt, "");
|
||||
tsv_qtimer_read(ectxt, dctxt, " ");
|
||||
tsv_byte_array_read(ectxt, dctxt, "");
|
||||
|
||||
/* add trailing \n if necessary */
|
||||
if (*(dctxt->buff - 1) != '\n') {
|
||||
if (dctxt->size) {
|
||||
++dctxt->buff;
|
||||
--dctxt->size;
|
||||
}
|
||||
*(dctxt->buff - 1) = '\n';
|
||||
}
|
||||
}
|
||||
|
||||
static int debug_log(struct ipc_log_context *ilctxt, char *buff, int size, int cont)
|
||||
{
|
||||
int i = 0;
|
||||
int ret;
|
||||
|
||||
if (size < MAX_MSG_DECODED_SIZE) {
|
||||
pr_err("%s: buffer size %d < %d\n", __func__, size, MAX_MSG_DECODED_SIZE);
|
||||
return -ENOMEM;
|
||||
}
|
||||
do {
|
||||
i = ipc_log_extract(ilctxt, buff, size - 1);
|
||||
if (cont && i == 0) {
|
||||
ret = wait_for_completion_interruptible(&ilctxt->read_avail);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
} while (cont && i == 0);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static char *ipc_log_cdev_devnode(const struct device *dev, umode_t *mode)
|
||||
{
|
||||
return kasprintf(GFP_KERNEL, "ipc_logging/%s", dev_name(dev));
|
||||
}
|
||||
|
||||
static int ipc_log_cdev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct ipc_log_cdev *ipl_cdev;
|
||||
|
||||
ipl_cdev = container_of(inode->i_cdev, struct ipc_log_cdev, cdev);
|
||||
filp->private_data = container_of(ipl_cdev, struct ipc_log_context, cdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* VFS Read operation which dispatches the call to the DevFS read command stored in
|
||||
* file->private_data.
|
||||
*
|
||||
* @filp File structure
|
||||
* @buff user buffer
|
||||
* @count size of user buffer
|
||||
* @offp file position to read from (only a value of 0 is accepted)
|
||||
*
|
||||
* @returns = 0 end of file
|
||||
* > 0 number of bytes read
|
||||
* < 0 error
|
||||
*/
|
||||
static ssize_t ipc_log_cdev_read(struct file *filp, char __user *buff, size_t count, loff_t *offp)
|
||||
{
|
||||
int ret, bsize;
|
||||
char *buffer;
|
||||
struct ipc_log_context *ilctxt;
|
||||
|
||||
ilctxt = filp->private_data;
|
||||
ret = kref_get_unless_zero(&ilctxt->refcount) ? 0 : -EIO;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
buffer = kmalloc(count, GFP_KERNEL);
|
||||
if (!buffer) {
|
||||
bsize = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* only support non-continuous mode */
|
||||
bsize = debug_log(ilctxt, buffer, count, 0);
|
||||
|
||||
if (bsize > 0) {
|
||||
if (copy_to_user(buff, buffer, bsize)) {
|
||||
bsize = -EFAULT;
|
||||
kfree(buffer);
|
||||
goto done;
|
||||
}
|
||||
*offp += bsize;
|
||||
}
|
||||
kfree(buffer);
|
||||
|
||||
done:
|
||||
ipc_log_context_put(ilctxt);
|
||||
return bsize;
|
||||
}
|
||||
|
||||
static const struct file_operations cdev_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ipc_log_cdev_open,
|
||||
.read = ipc_log_cdev_read,
|
||||
};
|
||||
|
||||
void ipc_log_cdev_remove(struct ipc_log_context *ilctxt)
|
||||
{
|
||||
if (ilctxt->cdev.dev.class) {
|
||||
cdev_device_del(&ilctxt->cdev.cdev, &ilctxt->cdev.dev);
|
||||
ida_free(&ipl_minor_ida, (unsigned int)MINOR(ilctxt->cdev.dev.devt));
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(ipc_log_cdev_remove);
|
||||
|
||||
void ipc_log_cdev_create(struct ipc_log_context *ilctxt, const char *mod_name)
|
||||
{
|
||||
int ret;
|
||||
int minor;
|
||||
dev_t devno;
|
||||
|
||||
if (!cdev_class) {
|
||||
pr_err("%s: %s no device class created\n", __func__, mod_name);
|
||||
return;
|
||||
}
|
||||
|
||||
minor = ida_alloc_range(&ipl_minor_ida, 0, IPL_CDEV_MAX, GFP_KERNEL);
|
||||
if (minor < 0) {
|
||||
pr_err("%s: %s failed to alloc ipl minor number %d\n", __func__, mod_name, minor);
|
||||
return;
|
||||
}
|
||||
|
||||
devno = MKDEV(MAJOR(cdev_devt), minor);
|
||||
device_initialize(&ilctxt->cdev.dev);
|
||||
ilctxt->cdev.dev.devt = devno;
|
||||
ilctxt->cdev.dev.class = cdev_class;
|
||||
dev_set_name(&ilctxt->cdev.dev, "%s", mod_name);
|
||||
|
||||
cdev_init(&ilctxt->cdev.cdev, &cdev_fops);
|
||||
ret = cdev_device_add(&ilctxt->cdev.cdev, &ilctxt->cdev.dev);
|
||||
if (ret) {
|
||||
pr_err("%s: unable to add ipl cdev %s, %d\n", __func__, mod_name, ret);
|
||||
ilctxt->cdev.dev.class = NULL;
|
||||
ida_free(&ipl_minor_ida, (unsigned int)minor);
|
||||
put_device(&ilctxt->cdev.dev);
|
||||
return;
|
||||
}
|
||||
|
||||
add_deserialization_func((void *)ilctxt, TSV_TYPE_STRING, dfunc_string);
|
||||
}
|
||||
EXPORT_SYMBOL(ipc_log_cdev_create);
|
||||
|
||||
void ipc_log_cdev_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
cdev_class = NULL;
|
||||
|
||||
ret = alloc_chrdev_region(&cdev_devt, 0, IPL_CDEV_MAX, "ipc_logging");
|
||||
if (ret) {
|
||||
pr_err("%s: unable to create ipl cdev regoin %d\n", __func__, ret);
|
||||
return;
|
||||
}
|
||||
|
||||
cdev_class = class_create("ipc_logging");
|
||||
if (IS_ERR(cdev_class)) {
|
||||
pr_err("%s: unable to create ipl cdev class %ld\n", __func__, PTR_ERR(cdev_class));
|
||||
cdev_class = NULL;
|
||||
unregister_chrdev_region(cdev_devt, IPL_CDEV_MAX);
|
||||
return;
|
||||
}
|
||||
|
||||
cdev_class->devnode = ipc_log_cdev_devnode;
|
||||
}
|
||||
EXPORT_SYMBOL(ipc_log_cdev_init);
|
191
kernel/trace/ipc_logging_debug.c
Normal file
191
kernel/trace/ipc_logging_debug.c
Normal file
@@ -0,0 +1,191 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2012-2022 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/ipc_logging.h>
|
||||
|
||||
#include "ipc_logging_private.h"
|
||||
|
||||
static DEFINE_MUTEX(ipc_log_debugfs_init_lock);
|
||||
static struct dentry *root_dent;
|
||||
|
||||
static int debug_log(struct ipc_log_context *ilctxt,
|
||||
char *buff, int size, int cont)
|
||||
{
|
||||
int i = 0;
|
||||
int ret;
|
||||
|
||||
if (size < MAX_MSG_DECODED_SIZE) {
|
||||
pr_err("%s: buffer size %d < %d\n", __func__, size,
|
||||
MAX_MSG_DECODED_SIZE);
|
||||
return -ENOMEM;
|
||||
}
|
||||
do {
|
||||
i = ipc_log_extract(ilctxt, buff, size - 1);
|
||||
if (cont && i == 0) {
|
||||
ret = wait_for_completion_interruptible(
|
||||
&ilctxt->read_avail);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
} while (cont && i == 0);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* VFS Read operation helper which dispatches the call to the debugfs
|
||||
* read command stored in file->private_data.
|
||||
*
|
||||
* @file File structure
|
||||
* @buff user buffer
|
||||
* @count size of user buffer
|
||||
* @ppos file position to read from (only a value of 0 is accepted)
|
||||
* @cont 1 = continuous mode (don't return 0 to signal end-of-file)
|
||||
*
|
||||
* @returns ==0 end of file
|
||||
* >0 number of bytes read
|
||||
* <0 error
|
||||
*/
|
||||
static ssize_t debug_read_helper(struct file *file, char __user *buff,
|
||||
size_t count, loff_t *ppos, int cont)
|
||||
{
|
||||
struct ipc_log_context *ilctxt;
|
||||
struct dentry *d = file->f_path.dentry;
|
||||
char *buffer;
|
||||
int bsize;
|
||||
int r;
|
||||
|
||||
r = debugfs_file_get(d);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ilctxt = file->private_data;
|
||||
r = kref_get_unless_zero(&ilctxt->refcount) ? 0 : -EIO;
|
||||
if (r) {
|
||||
debugfs_file_put(d);
|
||||
return r;
|
||||
}
|
||||
|
||||
buffer = kmalloc(count, GFP_KERNEL);
|
||||
if (!buffer) {
|
||||
bsize = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
bsize = debug_log(ilctxt, buffer, count, cont);
|
||||
|
||||
if (bsize > 0) {
|
||||
if (copy_to_user(buff, buffer, bsize)) {
|
||||
bsize = -EFAULT;
|
||||
kfree(buffer);
|
||||
goto done;
|
||||
}
|
||||
*ppos += bsize;
|
||||
}
|
||||
kfree(buffer);
|
||||
|
||||
done:
|
||||
ipc_log_context_put(ilctxt);
|
||||
debugfs_file_put(d);
|
||||
return bsize;
|
||||
}
|
||||
|
||||
static ssize_t debug_read(struct file *file, char __user *buff,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
return debug_read_helper(file, buff, count, ppos, 0);
|
||||
}
|
||||
|
||||
static ssize_t debug_read_cont(struct file *file, char __user *buff,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
return debug_read_helper(file, buff, count, ppos, 1);
|
||||
}
|
||||
|
||||
static const struct file_operations debug_ops = {
|
||||
.read = debug_read,
|
||||
.open = simple_open,
|
||||
};
|
||||
|
||||
static const struct file_operations debug_ops_cont = {
|
||||
.read = debug_read_cont,
|
||||
.open = simple_open,
|
||||
};
|
||||
|
||||
static void debug_create(const char *name, mode_t mode,
|
||||
struct dentry *dent,
|
||||
struct ipc_log_context *ilctxt,
|
||||
const struct file_operations *fops)
|
||||
{
|
||||
debugfs_create_file_unsafe(name, mode, dent, ilctxt, fops);
|
||||
}
|
||||
|
||||
static void dfunc_string(struct encode_context *ectxt,
|
||||
struct decode_context *dctxt)
|
||||
{
|
||||
tsv_timestamp_read(ectxt, dctxt, "");
|
||||
tsv_qtimer_read(ectxt, dctxt, " ");
|
||||
tsv_byte_array_read(ectxt, dctxt, "");
|
||||
|
||||
/* add trailing \n if necessary */
|
||||
if (*(dctxt->buff - 1) != '\n') {
|
||||
if (dctxt->size) {
|
||||
++dctxt->buff;
|
||||
--dctxt->size;
|
||||
}
|
||||
*(dctxt->buff - 1) = '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void check_and_create_debugfs(void)
|
||||
{
|
||||
mutex_lock(&ipc_log_debugfs_init_lock);
|
||||
if (!root_dent) {
|
||||
root_dent = debugfs_create_dir("ipc_logging", 0);
|
||||
|
||||
if (IS_ERR(root_dent)) {
|
||||
pr_err("%s: unable to create debugfs %ld\n",
|
||||
__func__, PTR_ERR(root_dent));
|
||||
root_dent = NULL;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&ipc_log_debugfs_init_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(check_and_create_debugfs);
|
||||
|
||||
void create_ctx_debugfs(struct ipc_log_context *ctxt,
|
||||
const char *mod_name)
|
||||
{
|
||||
if (!root_dent)
|
||||
check_and_create_debugfs();
|
||||
|
||||
if (root_dent) {
|
||||
ctxt->dent = debugfs_create_dir(mod_name, root_dent);
|
||||
if (!IS_ERR(ctxt->dent)) {
|
||||
debug_create("log", 0444, ctxt->dent,
|
||||
ctxt, &debug_ops);
|
||||
debug_create("log_cont", 0444, ctxt->dent,
|
||||
ctxt, &debug_ops_cont);
|
||||
}
|
||||
}
|
||||
add_deserialization_func((void *)ctxt,
|
||||
TSV_TYPE_STRING, dfunc_string);
|
||||
}
|
||||
EXPORT_SYMBOL(create_ctx_debugfs);
|
193
kernel/trace/ipc_logging_private.h
Normal file
193
kernel/trace/ipc_logging_private.h
Normal file
@@ -0,0 +1,193 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2012-2023 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
#ifndef _IPC_LOGGING_PRIVATE_H
|
||||
#define _IPC_LOGGING_PRIVATE_H
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/ipc_logging.h>
|
||||
|
||||
#define IPC_LOG_VERSION 0x0003
|
||||
#define IPC_LOG_MAX_CONTEXT_NAME_LEN 32
|
||||
|
||||
/**
|
||||
* struct ipc_log_page_header - Individual log page header
|
||||
*
|
||||
* @magic: Magic number (used for log extraction)
|
||||
* @nmagic: Inverse of magic number (used for log extraction)
|
||||
* @page_num: Index of page (0.. N - 1) (note top bit is always set)
|
||||
* @read_offset: Read offset in page
|
||||
* @write_offset: Write offset in page (or 0xFFFF if full)
|
||||
* @log_id: ID of logging context that owns this page
|
||||
* @start_time: Scheduler clock for first write time in page
|
||||
* @end_time: Scheduler clock for last write time in page
|
||||
* @ctx_offset: Signed offset from page to the logging context. Used to
|
||||
* optimize ram-dump extraction.
|
||||
*
|
||||
* @list: Linked list of pages that make up a log
|
||||
* @nd_read_offset: Non-destructive read offset used for debugfs
|
||||
*
|
||||
* The first part of the structure defines data that is used to extract the
|
||||
* logs from a memory dump and elements in this section should not be changed
|
||||
* or re-ordered. New local data structures can be added to the end of the
|
||||
* structure since they will be ignored by the extraction tool.
|
||||
*/
|
||||
struct ipc_log_page_header {
|
||||
uint32_t magic;
|
||||
uint32_t nmagic;
|
||||
uint32_t page_num;
|
||||
uint16_t read_offset;
|
||||
uint16_t write_offset;
|
||||
uint64_t log_id;
|
||||
uint64_t start_time;
|
||||
uint64_t end_time;
|
||||
int64_t ctx_offset;
|
||||
|
||||
/* add local data structures after this point */
|
||||
struct list_head list;
|
||||
uint16_t nd_read_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ipc_log_page - Individual log page
|
||||
*
|
||||
* @hdr: Log page header
|
||||
* @data: Log data
|
||||
*
|
||||
* Each log consists of 1 to N log pages. Data size is adjusted to always fit
|
||||
* the structure into a single kernel page.
|
||||
*/
|
||||
struct ipc_log_page {
|
||||
struct ipc_log_page_header hdr;
|
||||
char data[PAGE_SIZE - sizeof(struct ipc_log_page_header)];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ipc_log_cdev - Ipc logging character device
|
||||
*
|
||||
* @cdev: character device structure
|
||||
* @dev: device structure
|
||||
*
|
||||
* Character device structure for ipc logging. Used to create character device nodes in DevFS.
|
||||
*/
|
||||
struct ipc_log_cdev {
|
||||
struct cdev cdev;
|
||||
struct device dev;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ipc_log_context - main logging context
|
||||
*
|
||||
* @magic: Magic number (used for log extraction)
|
||||
* @nmagic: Inverse of magic number (used for log extraction)
|
||||
* @version: IPC Logging version of log format
|
||||
* @user_version: Version number for user-defined messages
|
||||
* @header_size: Size of the log header which is used to determine the offset
|
||||
* of ipc_log_page::data
|
||||
* @log_id: Log ID (assigned when log is created)
|
||||
* @name: Name of the log used to uniquely identify the log during extraction
|
||||
*
|
||||
* @list: List of log contexts (struct ipc_log_context)
|
||||
* @page_list: List of log pages (struct ipc_log_page)
|
||||
* @first_page: First page in list of logging pages
|
||||
* @last_page: Last page in list of logging pages
|
||||
* @write_page: Current write page
|
||||
* @read_page: Current read page (for internal reads)
|
||||
* @nd_read_page: Current debugfs extraction page (non-destructive)
|
||||
*
|
||||
* @write_avail: Number of bytes available to write in all pages
|
||||
* @dent: Debugfs node for run-time log extraction
|
||||
* @dfunc_info_list: List of deserialization functions
|
||||
* @context_lock_lhb1: Lock for entire structure
|
||||
* @read_avail: Completed when new data is added to the log
|
||||
* @cdev: Ipc logging character device
|
||||
*/
|
||||
struct ipc_log_context {
|
||||
uint32_t magic;
|
||||
uint32_t nmagic;
|
||||
uint32_t version;
|
||||
uint16_t user_version;
|
||||
uint16_t header_size;
|
||||
uint64_t log_id;
|
||||
char name[IPC_LOG_MAX_CONTEXT_NAME_LEN];
|
||||
|
||||
/* add local data structures after this point */
|
||||
struct list_head list;
|
||||
struct list_head page_list;
|
||||
struct ipc_log_page *first_page;
|
||||
struct ipc_log_page *last_page;
|
||||
struct ipc_log_page *write_page;
|
||||
struct ipc_log_page *read_page;
|
||||
struct ipc_log_page *nd_read_page;
|
||||
|
||||
uint32_t write_avail;
|
||||
struct dentry *dent;
|
||||
struct list_head dfunc_info_list;
|
||||
spinlock_t context_lock_lhb1;
|
||||
struct completion read_avail;
|
||||
struct kref refcount;
|
||||
bool destroyed;
|
||||
struct ipc_log_cdev cdev;
|
||||
};
|
||||
|
||||
struct dfunc_info {
|
||||
struct list_head list;
|
||||
int type;
|
||||
void (*dfunc)(struct encode_context *enc, struct decode_context *dec);
|
||||
};
|
||||
|
||||
enum {
|
||||
TSV_TYPE_INVALID,
|
||||
TSV_TYPE_TIMESTAMP,
|
||||
TSV_TYPE_POINTER,
|
||||
TSV_TYPE_INT32,
|
||||
TSV_TYPE_BYTE_ARRAY,
|
||||
TSV_TYPE_QTIMER,
|
||||
};
|
||||
|
||||
enum {
|
||||
OUTPUT_DEBUGFS,
|
||||
};
|
||||
|
||||
#define IPC_LOG_CONTEXT_MAGIC_NUM 0x25874452
|
||||
#define IPC_LOGGING_MAGIC_NUM 0x52784425
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#define IS_MSG_TYPE(x) (((x) > TSV_TYPE_MSG_START) && \
|
||||
((x) < TSV_TYPE_MSG_END))
|
||||
#define MAX_MSG_DECODED_SIZE (MAX_MSG_SIZE*4)
|
||||
|
||||
void ipc_log_context_free(struct kref *kref);
|
||||
|
||||
static inline void ipc_log_context_put(struct ipc_log_context *ilctxt)
|
||||
{
|
||||
kref_put(&ilctxt->refcount, ipc_log_context_free);
|
||||
}
|
||||
|
||||
#if (defined(CONFIG_DEBUG_FS))
|
||||
void check_and_create_debugfs(void);
|
||||
|
||||
void create_ctx_debugfs(struct ipc_log_context *ctxt,
|
||||
const char *mod_name);
|
||||
#else
|
||||
void check_and_create_debugfs(void)
|
||||
{
|
||||
}
|
||||
|
||||
void create_ctx_debugfs(struct ipc_log_context *ctxt, const char *mod_name)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPC_LOGGING_CDEV)
|
||||
void ipc_log_cdev_init(void);
|
||||
void ipc_log_cdev_create(struct ipc_log_context *ilctxt, const char *mod_name);
|
||||
void ipc_log_cdev_remove(struct ipc_log_context *ilctxt);
|
||||
#else
|
||||
static inline void ipc_log_cdev_init(void) {}
|
||||
static inline void ipc_log_cdev_create(struct ipc_log_context *ilctxt, const char *mod_name) {}
|
||||
static inline void ipc_log_cdev_remove(struct ipc_log_context *ilctxt) {}
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -32,10 +32,6 @@
|
||||
|
||||
#include <trace/events/module.h>
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
#include <linux/defex.h>
|
||||
#endif
|
||||
|
||||
static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
|
||||
static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
|
||||
static DEFINE_SPINLOCK(umh_sysctl_lock);
|
||||
@@ -427,11 +423,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
|
||||
if (strlen(sub_info->path) == 0)
|
||||
goto out;
|
||||
|
||||
#ifdef CONFIG_SECURITY_DEFEX
|
||||
if (task_defex_user_exec(sub_info->path)) {
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Set the completion pointer only if there is a waiter.
|
||||
* This makes it possible to use umh_complete to free
|
||||
|
Reference in New Issue
Block a user