5683 lines
157 KiB
C
5683 lines
157 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
|
|
* Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
|
|
*/
|
|
|
|
#include <linux/syscore_ops.h>
|
|
#include <linux/cpufreq.h>
|
|
#include <linux/list_sort.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/sched/stat.h>
|
|
#include <linux/module.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/arch_topology.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/of.h>
|
|
#include <linux/of_platform.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/time64.h>
|
|
|
|
#include <trace/hooks/sched.h>
|
|
#include <trace/hooks/cpufreq.h>
|
|
#include <trace/events/power.h>
|
|
#include "walt.h"
|
|
#include "trace.h"
|
|
#include <linux/sec_debug.h>
|
|
|
|
bool enable_logging;
|
|
bool trail_active;
|
|
bool sustain_active;
|
|
|
|
const char *task_event_names[] = {
|
|
"PUT_PREV_TASK",
|
|
"PICK_NEXT_TASK",
|
|
"TASK_WAKE",
|
|
"TASK_MIGRATE",
|
|
"TASK_UPDATE",
|
|
"IRQ_UPDATE"
|
|
};
|
|
|
|
const char *migrate_type_names[] = {
|
|
"GROUP_TO_RQ",
|
|
"RQ_TO_GROUP",
|
|
"RQ_TO_RQ",
|
|
"GROUP_TO_GROUP"
|
|
};
|
|
|
|
#define SCHED_FREQ_ACCOUNT_WAIT_TIME 0
|
|
#define SCHED_ACCOUNT_WAIT_TIME 1
|
|
|
|
#define EARLY_DETECTION_DURATION 9500000
|
|
#define MAX_NUM_CGROUP_COLOC_ID 20
|
|
|
|
#define NEW_TASK_ACTIVE_TIME 100000000
|
|
|
|
cpumask_t walt_cpus_taken_mask = { CPU_BITS_NONE };
|
|
DEFINE_SPINLOCK(cpus_taken_lock);
|
|
DEFINE_PER_CPU(int, cpus_taken_refcount);
|
|
|
|
DEFINE_PER_CPU(struct walt_rq, walt_rq);
|
|
unsigned int sysctl_sched_user_hint;
|
|
static u64 sched_clock_last;
|
|
static bool walt_clock_suspended;
|
|
DECLARE_COMPLETION(walt_get_cycle_counts_cb_completion);
|
|
bool use_cycle_counter;
|
|
u64 (*walt_get_cycle_counts_cb)(int cpu, u64 wc);
|
|
|
|
static u64 walt_load_reported_window;
|
|
|
|
struct irq_work walt_cpufreq_irq_work;
|
|
struct irq_work walt_migration_irq_work;
|
|
unsigned int walt_rotation_enabled;
|
|
|
|
unsigned int __read_mostly sched_ravg_window = 20000000;
|
|
int min_possible_cluster_id;
|
|
int max_possible_cluster_id;
|
|
/* Initial task load. Newly created tasks are assigned this load. */
|
|
unsigned int __read_mostly sched_init_task_load_windows;
|
|
/*
|
|
* Task load is categorized into buckets for the purpose of top task tracking.
|
|
* The entire range of load from 0 to sched_ravg_window needs to be covered
|
|
* in NUM_LOAD_INDICES number of buckets. Therefore the size of each bucket
|
|
* is given by sched_ravg_window / NUM_LOAD_INDICES. Since the default value
|
|
* of sched_ravg_window is DEFAULT_SCHED_RAVG_WINDOW, use that to compute
|
|
* sched_load_granule.
|
|
*/
|
|
unsigned int __read_mostly sched_load_granule;
|
|
|
|
/* frequent yielder tracking */
|
|
static unsigned int total_yield_cnt;
|
|
static unsigned int total_sleep_cnt;
|
|
static u64 yield_counting_window_ts;
|
|
|
|
bool walt_is_idle_task(struct task_struct *p)
|
|
{
|
|
return walt_flag_test(p, WALT_IDLE_TASK_BIT);
|
|
}
|
|
|
|
u64 walt_sched_clock(void)
|
|
{
|
|
if (unlikely(walt_clock_suspended))
|
|
return sched_clock_last;
|
|
return sched_clock();
|
|
}
|
|
|
|
static void walt_resume(void)
|
|
{
|
|
walt_clock_suspended = false;
|
|
}
|
|
|
|
static int walt_suspend(void)
|
|
{
|
|
sched_clock_last = sched_clock();
|
|
walt_clock_suspended = true;
|
|
return 0;
|
|
}
|
|
|
|
static struct syscore_ops walt_syscore_ops = {
|
|
.resume = walt_resume,
|
|
.suspend = walt_suspend
|
|
};
|
|
|
|
/*
|
|
*@boost:should be 0,1,2.
|
|
*@period:boost time based on ms units.
|
|
*/
|
|
int set_task_boost(int boost, u64 period)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) current->android_vendor_data1;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return -EAGAIN;
|
|
|
|
if (boost < TASK_BOOST_NONE || boost >= TASK_BOOST_END)
|
|
return -EINVAL;
|
|
if (boost) {
|
|
wts->boost = boost;
|
|
wts->boost_period = (u64)period * 1000 * 1000;
|
|
wts->boost_expires = walt_sched_clock() + wts->boost_period;
|
|
} else {
|
|
wts->boost = 0;
|
|
wts->boost_expires = 0;
|
|
wts->boost_period = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(set_task_boost);
|
|
|
|
static inline void acquire_rq_locks_irqsave(const cpumask_t *cpus,
|
|
unsigned long *flags)
|
|
{
|
|
int cpu;
|
|
int level;
|
|
|
|
local_irq_save(*flags);
|
|
|
|
level = 0;
|
|
for_each_cpu(cpu, cpus) {
|
|
if (level == 0)
|
|
raw_spin_lock(&cpu_rq(cpu)->__lock);
|
|
else
|
|
raw_spin_lock_nested(&cpu_rq(cpu)->__lock, level);
|
|
level++;
|
|
}
|
|
}
|
|
|
|
static inline void release_rq_locks_irqrestore(const cpumask_t *cpus,
|
|
unsigned long *flags)
|
|
{
|
|
int cpu;
|
|
|
|
for_each_cpu(cpu, cpus)
|
|
raw_spin_unlock(&cpu_rq(cpu)->__lock);
|
|
local_irq_restore(*flags);
|
|
}
|
|
|
|
static inline u64 walt_rq_clock(struct rq *rq)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
if (unlikely(walt_clock_suspended))
|
|
return sched_clock_last;
|
|
|
|
walt_lockdep_assert_rq(rq, NULL);
|
|
|
|
if (!(rq->clock_update_flags & RQCF_UPDATED))
|
|
update_rq_clock(rq);
|
|
|
|
return max(rq_clock(rq), wrq->latest_clock);
|
|
}
|
|
|
|
static unsigned int walt_cpu_high_irqload;
|
|
|
|
static __read_mostly unsigned int sched_io_is_busy = 1;
|
|
|
|
/* Window size (in ns) */
|
|
static __read_mostly unsigned int new_sched_ravg_window = DEFAULT_SCHED_RAVG_WINDOW;
|
|
|
|
static DEFINE_SPINLOCK(sched_ravg_window_lock);
|
|
static u64 sched_ravg_window_change_time;
|
|
|
|
static unsigned int __read_mostly sched_init_task_load_windows_scaled;
|
|
|
|
/* Size of bitmaps maintained to track top tasks */
|
|
static const unsigned int top_tasks_bitmap_size =
|
|
BITS_TO_LONGS(NUM_LOAD_INDICES + 1) * sizeof(unsigned long);
|
|
|
|
__read_mostly unsigned int walt_scale_demand_divisor;
|
|
|
|
#define SCHED_PRINT(arg) printk_deferred("%s=%llu", #arg, (unsigned long long)arg)
|
|
#define STRG(arg) #arg
|
|
|
|
void walt_task_dump(struct task_struct *p)
|
|
{
|
|
char buff[WALT_NR_CPUS * 16];
|
|
int i, j = 0;
|
|
int buffsz = WALT_NR_CPUS * 16;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
bool is_32bit_thread = is_compat_thread(task_thread_info(p));
|
|
|
|
printk_deferred("Task: %.16s-%d\n", p->comm, p->pid);
|
|
SCHED_PRINT(READ_ONCE(p->__state));
|
|
SCHED_PRINT(task_thread_info(p)->cpu);
|
|
SCHED_PRINT(p->policy);
|
|
SCHED_PRINT(p->prio);
|
|
SCHED_PRINT(wts->mark_start);
|
|
SCHED_PRINT(wts->demand);
|
|
SCHED_PRINT(wts->coloc_demand);
|
|
SCHED_PRINT(wts->enqueue_after_migration);
|
|
SCHED_PRINT(wts->prev_cpu);
|
|
SCHED_PRINT(wts->new_cpu);
|
|
SCHED_PRINT(wts->misfit);
|
|
SCHED_PRINT(wts->prev_on_rq);
|
|
SCHED_PRINT(wts->prev_on_rq_cpu);
|
|
SCHED_PRINT(wts->mvp_prio);
|
|
SCHED_PRINT(wts->iowaited);
|
|
SCHED_PRINT(sched_ravg_window);
|
|
SCHED_PRINT(new_sched_ravg_window);
|
|
|
|
for (i = 0 ; i < nr_cpu_ids; i++)
|
|
j += scnprintf(buff + j, buffsz - j, "%u ",
|
|
wts->curr_window_cpu[i]);
|
|
printk_deferred("%s=%u (%s)\n", STRG(wts->curr_window),
|
|
wts->curr_window, buff);
|
|
|
|
for (i = 0, j = 0 ; i < nr_cpu_ids; i++)
|
|
j += scnprintf(buff + j, buffsz - j, "%u ",
|
|
wts->prev_window_cpu[i]);
|
|
printk_deferred("%s=%u (%s)\n", STRG(wts->prev_window),
|
|
wts->prev_window, buff);
|
|
|
|
SCHED_PRINT(wts->last_sleep_ts);
|
|
SCHED_PRINT(wts->last_wake_ts);
|
|
SCHED_PRINT(wts->last_enqueued_ts);
|
|
SCHED_PRINT(wts->mark_start_birth_ts);
|
|
SCHED_PRINT(wts->misfit);
|
|
SCHED_PRINT(wts->unfilter);
|
|
SCHED_PRINT(is_32bit_thread);
|
|
SCHED_PRINT(wts->grp);
|
|
SCHED_PRINT(p->on_cpu);
|
|
SCHED_PRINT(p->on_rq);
|
|
}
|
|
|
|
void walt_rq_dump(int cpu)
|
|
{
|
|
struct rq *rq = cpu_rq(cpu);
|
|
struct task_struct *tsk = cpu_curr(cpu);
|
|
int i;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
/*
|
|
* Increment the task reference so that it can't be
|
|
* freed on a remote CPU. Since we are going to
|
|
* enter panic, there is no need to decrement the
|
|
* task reference. Decrementing the task reference
|
|
* can't be done in atomic context, especially with
|
|
* rq locks held.
|
|
*/
|
|
get_task_struct(tsk);
|
|
printk_deferred("CPU:%d nr_running:%u current: %d (%s)\n",
|
|
cpu, rq->nr_running, tsk->pid, tsk->comm);
|
|
|
|
printk_deferred("==========================================");
|
|
SCHED_PRINT(wrq->latest_clock);
|
|
SCHED_PRINT(wrq->window_start);
|
|
SCHED_PRINT(wrq->prev_window_size);
|
|
SCHED_PRINT(wrq->curr_runnable_sum);
|
|
SCHED_PRINT(wrq->prev_runnable_sum);
|
|
SCHED_PRINT(wrq->nt_curr_runnable_sum);
|
|
SCHED_PRINT(wrq->nt_prev_runnable_sum);
|
|
SCHED_PRINT(wrq->task_exec_scale);
|
|
SCHED_PRINT(wrq->grp_time.curr_runnable_sum);
|
|
SCHED_PRINT(wrq->grp_time.prev_runnable_sum);
|
|
SCHED_PRINT(wrq->grp_time.nt_curr_runnable_sum);
|
|
SCHED_PRINT(wrq->grp_time.nt_prev_runnable_sum);
|
|
for (i = 0 ; i < NUM_TRACKED_WINDOWS; i++) {
|
|
printk_deferred("wrq->load_subs[%d].window_start=%llu)\n", i,
|
|
wrq->load_subs[i].window_start);
|
|
printk_deferred("wrq->load_subs[%d].subs=%llu)\n", i,
|
|
wrq->load_subs[i].subs);
|
|
printk_deferred("wrq->load_subs[%d].new_subs=%llu)\n", i,
|
|
wrq->load_subs[i].new_subs);
|
|
}
|
|
walt_task_dump(tsk);
|
|
SCHED_PRINT(sched_capacity_margin_up[cpu]);
|
|
SCHED_PRINT(sched_capacity_margin_down[cpu]);
|
|
}
|
|
|
|
void walt_dump(void)
|
|
{
|
|
int cpu;
|
|
|
|
printk_deferred("============ WALT RQ DUMP START ==============\n");
|
|
printk_deferred("Sched clock: %llu\n", walt_sched_clock());
|
|
printk_deferred("Time last window changed=%llu\n",
|
|
sched_ravg_window_change_time);
|
|
printk_deferred("global_ws=%llu\n",
|
|
atomic64_read(&walt_irq_work_lastq_ws));
|
|
for_each_online_cpu(cpu)
|
|
walt_rq_dump(cpu);
|
|
SCHED_PRINT(max_possible_cluster_id);
|
|
printk_deferred("============ WALT RQ DUMP END ==============\n");
|
|
}
|
|
|
|
int in_sched_bug;
|
|
|
|
static inline void
|
|
fixup_cumulative_runnable_avg(struct rq *rq,
|
|
struct task_struct *p,
|
|
struct walt_sched_stats *stats,
|
|
s64 demand_scaled_delta,
|
|
s64 pred_demand_scaled_delta)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
s64 cumulative_runnable_avg_scaled =
|
|
stats->cumulative_runnable_avg_scaled + demand_scaled_delta;
|
|
s64 pred_demands_sum_scaled =
|
|
stats->pred_demands_sum_scaled + pred_demand_scaled_delta;
|
|
|
|
walt_lockdep_assert_rq(rq, p);
|
|
|
|
if (task_rq(p) != rq)
|
|
WALT_BUG(WALT_BUG_UPSTREAM, p, "on CPU %d task %s(%d) not on rq %d",
|
|
raw_smp_processor_id(), p->comm, p->pid, rq->cpu);
|
|
|
|
if (cumulative_runnable_avg_scaled < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, p, "on CPU %d task ds=%hu is higher than cra=%llu\n",
|
|
raw_smp_processor_id(), wts->demand_scaled,
|
|
stats->cumulative_runnable_avg_scaled);
|
|
cumulative_runnable_avg_scaled = 0;
|
|
}
|
|
stats->cumulative_runnable_avg_scaled = (u64)cumulative_runnable_avg_scaled;
|
|
|
|
if (pred_demands_sum_scaled < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, p, "on CPU %d task pds=%hu is higher than pds_sum=%llu\n",
|
|
raw_smp_processor_id(), wts->pred_demand_scaled,
|
|
stats->pred_demands_sum_scaled);
|
|
pred_demands_sum_scaled = 0;
|
|
}
|
|
stats->pred_demands_sum_scaled = (u64)pred_demands_sum_scaled;
|
|
}
|
|
|
|
static void fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
|
|
u16 updated_demand_scaled,
|
|
u16 updated_pred_demand_scaled)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
s64 task_load_delta = (s64)updated_demand_scaled -
|
|
wts->demand_scaled;
|
|
s64 pred_demand_delta = (s64)updated_pred_demand_scaled -
|
|
wts->pred_demand_scaled;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
fixup_cumulative_runnable_avg(rq, p, &wrq->walt_stats, task_load_delta,
|
|
pred_demand_delta);
|
|
}
|
|
|
|
static void rollover_cpu_window(struct rq *rq, bool full_window);
|
|
static void rollover_top_tasks(struct rq *rq, bool full_window);
|
|
|
|
/*
|
|
* Demand aggregation for frequency purpose:
|
|
*
|
|
* CPU demand of tasks from various related groups is aggregated per-cluster and
|
|
* added to the "max_busy_cpu" in that cluster, where max_busy_cpu is determined
|
|
* by just wrq->prev_runnable_sum.
|
|
*
|
|
* Some examples follow, which assume:
|
|
* Cluster0 = CPU0-3, Cluster1 = CPU4-7
|
|
* One related thread group A that has tasks A0, A1, A2
|
|
*
|
|
* A->cpu_time[X].curr/prev_sum = counters in which cpu execution stats of
|
|
* tasks belonging to group A are accumulated when they run on cpu X.
|
|
*
|
|
* CX->curr/prev_sum = counters in which cpu execution stats of all tasks
|
|
* not belonging to group A are accumulated when they run on cpu X
|
|
*
|
|
* Lets say the stats for window M was as below:
|
|
*
|
|
* C0->prev_sum = 1ms, A->cpu_time[0].prev_sum = 5ms
|
|
* Task A0 ran 5ms on CPU0
|
|
* Task B0 ran 1ms on CPU0
|
|
*
|
|
* C1->prev_sum = 5ms, A->cpu_time[1].prev_sum = 6ms
|
|
* Task A1 ran 4ms on CPU1
|
|
* Task A2 ran 2ms on CPU1
|
|
* Task B1 ran 5ms on CPU1
|
|
*
|
|
* C2->prev_sum = 0ms, A->cpu_time[2].prev_sum = 0
|
|
* CPU2 idle
|
|
*
|
|
* C3->prev_sum = 0ms, A->cpu_time[3].prev_sum = 0
|
|
* CPU3 idle
|
|
*
|
|
* In this case, CPU1 was most busy going by just its prev_sum counter. Demand
|
|
* from all group A tasks are added to CPU1. IOW, at end of window M, cpu busy
|
|
* time reported to governor will be:
|
|
*
|
|
*
|
|
* C0 busy time = 1ms
|
|
* C1 busy time = 5 + 5 + 6 = 16ms
|
|
*
|
|
*/
|
|
__read_mostly bool sched_freq_aggr_en;
|
|
|
|
static u64
|
|
update_window_start(struct rq *rq, u64 wallclock, int event)
|
|
{
|
|
s64 delta;
|
|
int nr_windows;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_sched_cluster *cluster = cpu_cluster(task_cpu(current));
|
|
struct smart_freq_cluster_info *smart_freq_info = cluster->smart_freq_info;
|
|
u64 old_window_start = wrq->window_start;
|
|
bool full_window;
|
|
|
|
if (wallclock < wrq->latest_clock) {
|
|
WALT_BUG(WALT_BUG_WALT, NULL,
|
|
"on CPU%d; wallclock=%llu(0x%llx) is lesser than latest_clock=%llu(0x%llx)",
|
|
rq->cpu, wallclock, wallclock, wrq->latest_clock,
|
|
wrq->latest_clock);
|
|
wallclock = wrq->latest_clock;
|
|
}
|
|
delta = wallclock - wrq->window_start;
|
|
if (delta < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, NULL,
|
|
" on CPU%d; wallclock=%llu(0x%llx) is lesser than window_start=%llu(0x%llx)",
|
|
rq->cpu, wallclock, wallclock,
|
|
wrq->window_start, wrq->window_start);
|
|
delta = 0;
|
|
wallclock = max(wallclock, wrq->window_start);
|
|
}
|
|
wrq->latest_clock = wallclock;
|
|
if (delta < sched_ravg_window)
|
|
return old_window_start;
|
|
|
|
nr_windows = div64_u64(delta, sched_ravg_window);
|
|
wrq->window_start += (u64)nr_windows * (u64)sched_ravg_window;
|
|
|
|
wrq->prev_window_size = sched_ravg_window;
|
|
|
|
full_window = nr_windows > 1;
|
|
rollover_cpu_window(rq, full_window);
|
|
rollover_top_tasks(rq, full_window);
|
|
|
|
/* Update yielder statistics */
|
|
if (cpu_of(rq) == 0) {
|
|
u64 delta = wallclock - yield_counting_window_ts;
|
|
|
|
/* window boundary crossed */
|
|
if (delta > YIELD_WINDOW_SIZE_NSEC) {
|
|
unsigned int target_threshold_wake = MAX_YIELD_CNT_GLOBAL_THR;
|
|
unsigned int target_threshold_sleep = MAX_YIELD_SLEEP_CNT_GLOBAL_THR;
|
|
|
|
/*
|
|
* if update_window_start comes more than
|
|
* YIELD_GRACE_PERIOD_NSEC after the YIELD_WINDOW_SIZE_NSEC then
|
|
* extrapolate the threasholds based on delta time.
|
|
*/
|
|
|
|
if (unlikely(delta > YIELD_WINDOW_SIZE_NSEC + YIELD_GRACE_PERIOD_NSEC)) {
|
|
target_threshold_wake =
|
|
div64_u64(delta * MAX_YIELD_CNT_GLOBAL_THR,
|
|
YIELD_WINDOW_SIZE_NSEC);
|
|
target_threshold_sleep =
|
|
div64_u64(delta * MAX_YIELD_SLEEP_CNT_GLOBAL_THR,
|
|
YIELD_WINDOW_SIZE_NSEC);
|
|
}
|
|
|
|
if ((total_yield_cnt >= target_threshold_wake) ||
|
|
(total_sleep_cnt >= target_threshold_sleep / 2)) {
|
|
if (contiguous_yielding_windows < MIN_CONTIGUOUS_YIELDING_WINDOW)
|
|
contiguous_yielding_windows++;
|
|
} else {
|
|
contiguous_yielding_windows = 0;
|
|
}
|
|
trace_sched_yielder(wallclock, yield_counting_window_ts,
|
|
contiguous_yielding_windows,
|
|
total_yield_cnt, target_threshold_wake,
|
|
total_sleep_cnt, target_threshold_sleep,
|
|
smart_freq_info->cluster_active_reason);
|
|
|
|
yield_counting_window_ts = wallclock;
|
|
total_yield_cnt = 0;
|
|
total_sleep_cnt = 0;
|
|
}
|
|
}
|
|
|
|
return old_window_start;
|
|
}
|
|
|
|
/*
|
|
* Assumes rq_lock is held and wallclock was recorded in the same critical
|
|
* section as this function's invocation.
|
|
*/
|
|
static inline u64 read_cycle_counter(int cpu, u64 wallclock)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
|
|
|
|
if (wrq->last_cc_update != wallclock) {
|
|
wrq->cycles = walt_get_cycle_counts_cb(cpu, wallclock);
|
|
wrq->last_cc_update = wallclock;
|
|
}
|
|
|
|
return wrq->cycles;
|
|
}
|
|
|
|
static void update_task_cpu_cycles(struct task_struct *p, int cpu,
|
|
u64 wallclock)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
if (use_cycle_counter)
|
|
wts->cpu_cycles = read_cycle_counter(cpu, wallclock);
|
|
}
|
|
|
|
static inline bool is_ed_enabled(void)
|
|
{
|
|
return (boost_policy != SCHED_BOOST_NONE);
|
|
}
|
|
|
|
static inline bool is_ed_task(struct task_struct *p, u64 wallclock)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
return (wallclock - wts->last_wake_ts >= EARLY_DETECTION_DURATION);
|
|
}
|
|
|
|
static bool is_ed_task_present(struct rq *rq, u64 wallclock, struct task_struct *deq_task)
|
|
{
|
|
struct task_struct *p;
|
|
int loop_max = 10;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
wrq->ed_task = NULL;
|
|
|
|
if (!is_ed_enabled() || !rq->cfs.h_nr_running)
|
|
return false;
|
|
|
|
list_for_each_entry(p, &rq->cfs_tasks, se.group_node) {
|
|
if (!loop_max)
|
|
break;
|
|
|
|
if (p == deq_task)
|
|
continue;
|
|
|
|
if (is_ed_task(p, wallclock)) {
|
|
wrq->ed_task = p;
|
|
return true;
|
|
}
|
|
|
|
loop_max--;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event,
|
|
u64 wallclock, u64 irqtime);
|
|
/*
|
|
* Return total number of tasks "eligible" to run on higher capacity cpus
|
|
*/
|
|
unsigned int walt_big_tasks(int cpu)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
|
|
|
|
return wrq->walt_stats.nr_big_tasks;
|
|
}
|
|
|
|
int walt_trailblazer_tasks(int cpu)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
|
|
|
|
return wrq->walt_stats.nr_trailblazer_tasks;
|
|
}
|
|
|
|
bool trailblazer_on_prime(void)
|
|
{
|
|
int cpu;
|
|
|
|
for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
|
|
if (walt_trailblazer_tasks(cpu))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void clear_walt_request(int cpu)
|
|
{
|
|
struct rq *rq = cpu_rq(cpu);
|
|
unsigned long flags;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
clear_reserved(cpu);
|
|
if (wrq->push_task) {
|
|
struct task_struct *push_task = NULL;
|
|
|
|
raw_spin_lock_irqsave(&rq->__lock, flags);
|
|
if (wrq->push_task) {
|
|
clear_reserved(rq->push_cpu);
|
|
push_task = wrq->push_task;
|
|
wrq->push_task = NULL;
|
|
}
|
|
rq->active_balance = 0;
|
|
raw_spin_unlock_irqrestore(&rq->__lock, flags);
|
|
if (push_task)
|
|
put_task_struct(push_task);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Special case the last index and provide a fast path for index = 0.
|
|
* Note that sched_load_granule can change underneath us if we are not
|
|
* holding any runqueue locks while calling the two functions below.
|
|
*/
|
|
static u32 top_task_load(struct rq *rq)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
int index = wrq->prev_top;
|
|
u8 prev = 1 - wrq->curr_table;
|
|
|
|
if (!index) {
|
|
int msb = NUM_LOAD_INDICES - 1;
|
|
|
|
if (!test_bit(msb, wrq->top_tasks_bitmap[prev]))
|
|
return 0;
|
|
else
|
|
return sched_load_granule;
|
|
} else if (index == NUM_LOAD_INDICES - 1) {
|
|
return sched_ravg_window;
|
|
} else {
|
|
return (index + 1) * sched_load_granule;
|
|
}
|
|
}
|
|
|
|
unsigned long sched_user_hint_reset_time;
|
|
static bool is_cluster_hosting_top_app(struct walt_sched_cluster *cluster);
|
|
|
|
static inline bool
|
|
should_apply_suh_freq_boost(struct walt_sched_cluster *cluster)
|
|
{
|
|
if (sched_freq_aggr_en || !sysctl_sched_user_hint ||
|
|
!cluster->aggr_grp_load)
|
|
return false;
|
|
|
|
return is_cluster_hosting_top_app(cluster);
|
|
}
|
|
|
|
static inline u64 freq_policy_load(struct rq *rq, unsigned int *reason, bool trace)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_sched_cluster *cluster = wrq->cluster;
|
|
u64 aggr_grp_load = cluster->aggr_grp_load;
|
|
u64 load, tt_load = 0, kload = 0;
|
|
struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu_of(rq));
|
|
|
|
if (sched_freq_aggr_en) {
|
|
load = wrq->prev_runnable_sum + aggr_grp_load;
|
|
*reason = CPUFREQ_REASON_FREQ_AGR_BIT;
|
|
} else {
|
|
load = wrq->prev_runnable_sum +
|
|
wrq->grp_time.prev_runnable_sum;
|
|
}
|
|
|
|
if (cpu_ksoftirqd && READ_ONCE(cpu_ksoftirqd->__state) == TASK_RUNNING) {
|
|
kload = task_load(cpu_ksoftirqd);
|
|
if (kload > load) {
|
|
load = kload;
|
|
*reason = CPUFREQ_REASON_KSOFTIRQD_BIT;
|
|
}
|
|
}
|
|
|
|
tt_load = top_task_load(rq);
|
|
if (tt_load > load) {
|
|
load = tt_load;
|
|
*reason = CPUFREQ_REASON_TT_LOAD_BIT;
|
|
}
|
|
|
|
if (should_apply_suh_freq_boost(cluster)) {
|
|
if (is_suh_max())
|
|
load = sched_ravg_window;
|
|
else
|
|
load = div64_u64(load * sysctl_sched_user_hint,
|
|
(u64)100);
|
|
*reason = CPUFREQ_REASON_SUH_BIT;
|
|
}
|
|
|
|
if (wrq->ed_task) {
|
|
load = mult_frac(load, 100 + sysctl_ed_boost_pct, 100);
|
|
*reason = CPUFREQ_REASON_EARLY_DET_BIT;
|
|
}
|
|
|
|
if (wrq->lrb_pipeline_start_time) {
|
|
load = mult_frac(load, 100 + sysctl_pipeline_busy_boost_pct, 100);
|
|
*reason = CPUFREQ_REASON_PIPELINE_BUSY_BIT;
|
|
}
|
|
|
|
if (walt_rotation_enabled) {
|
|
load = sched_ravg_window;
|
|
*reason = CPUFREQ_REASON_BTR_BIT;
|
|
}
|
|
|
|
if (walt_trailblazer_tasks(cpu_of(rq)) && walt_feat(WALT_FEAT_TRAILBLAZER_BIT)) {
|
|
load = sched_ravg_window;
|
|
*reason = CPUFREQ_REASON_TRAILBLAZER_CPU_BIT;
|
|
}
|
|
|
|
if (trace)
|
|
trace_sched_load_to_gov(rq, aggr_grp_load, tt_load, sched_freq_aggr_en,
|
|
load, 0, walt_rotation_enabled,
|
|
sysctl_sched_user_hint, wrq, *reason);
|
|
return load;
|
|
}
|
|
|
|
static bool rtgb_active;
|
|
|
|
static inline unsigned long
|
|
__cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reason, bool trace)
|
|
{
|
|
u64 util;
|
|
struct rq *rq = cpu_rq(cpu);
|
|
unsigned long capacity = capacity_orig_of(cpu);
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
util = scale_time_to_util(freq_policy_load(rq, reason, trace));
|
|
|
|
/*
|
|
* util is on a scale of 0 to 1024. this is the utilization
|
|
* of the cpu in the last window
|
|
*/
|
|
wrq->util = util;
|
|
|
|
if (walt_load) {
|
|
u64 nl = wrq->nt_prev_runnable_sum +
|
|
wrq->grp_time.nt_prev_runnable_sum;
|
|
u64 pl = wrq->walt_stats.pred_demands_sum_scaled;
|
|
|
|
wrq->old_busy_time = util;
|
|
wrq->old_estimated_time = pl;
|
|
|
|
nl = scale_time_to_util(nl);
|
|
walt_load->nl = nl;
|
|
walt_load->pl = pl;
|
|
walt_load->ws = walt_load_reported_window;
|
|
walt_load->rtgb_active = rtgb_active;
|
|
if (wrq->ed_task)
|
|
walt_load->ed_active = true;
|
|
else
|
|
walt_load->ed_active = false;
|
|
walt_load->trailblazer_state = trailblazer_state;
|
|
}
|
|
|
|
return (util >= capacity) ? capacity : util;
|
|
}
|
|
|
|
#define PIPELINE_SYNC_VAL(first, second, x) \
|
|
(max(first, mult_frac(second, x, 100)))
|
|
|
|
int other_sync_pct(unsigned long util_other)
|
|
{
|
|
int pct;
|
|
|
|
if (sched_ravg_window >= SCHED_RAVG_16MS_WINDOW) {
|
|
if (util_other <=
|
|
load_sync_util_thres_60fps[num_sched_clusters - 2][num_sched_clusters - 1])
|
|
pct =
|
|
load_sync_low_pct_60fps[num_sched_clusters - 2][num_sched_clusters - 1];
|
|
else
|
|
pct =
|
|
load_sync_high_pct_60fps[num_sched_clusters - 2][num_sched_clusters - 1];
|
|
|
|
return pct;
|
|
}
|
|
|
|
if (util_other <= load_sync_util_thres[num_sched_clusters - 2][num_sched_clusters - 1])
|
|
pct = load_sync_low_pct[num_sched_clusters - 2][num_sched_clusters - 1];
|
|
else
|
|
pct = load_sync_high_pct[num_sched_clusters - 2][num_sched_clusters - 1];
|
|
|
|
return pct;
|
|
}
|
|
|
|
int prime_sync_pct(unsigned long util_prime)
|
|
{
|
|
int pct;
|
|
|
|
if (sched_ravg_window >= SCHED_RAVG_16MS_WINDOW) {
|
|
if (util_prime <=
|
|
load_sync_util_thres_60fps[num_sched_clusters - 1][num_sched_clusters - 2])
|
|
pct =
|
|
load_sync_low_pct_60fps[num_sched_clusters - 1][num_sched_clusters - 2];
|
|
else
|
|
pct =
|
|
load_sync_high_pct_60fps[num_sched_clusters - 1][num_sched_clusters - 2];
|
|
|
|
return pct;
|
|
}
|
|
|
|
if (util_prime <= load_sync_util_thres[num_sched_clusters - 1][num_sched_clusters - 2])
|
|
pct = load_sync_low_pct[num_sched_clusters - 1][num_sched_clusters - 2];
|
|
else
|
|
pct = load_sync_high_pct[num_sched_clusters - 1][num_sched_clusters - 2];
|
|
|
|
return pct;
|
|
}
|
|
|
|
unsigned long
|
|
cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load, unsigned int *reason)
|
|
{
|
|
struct walt_cpu_load wl_other = {0};
|
|
struct walt_cpu_load wl_prime = {0};
|
|
unsigned long util = 0, util_other = 0, util_prime = 0;
|
|
unsigned long capacity = capacity_orig_of(cpu);
|
|
int i, mpct_other, mpct_prime;
|
|
unsigned long max_nl_other = 0, max_pl_other = 0;
|
|
unsigned long max_nl_prime = 0, max_pl_prime = 0;
|
|
|
|
util = __cpu_util_freq_walt(cpu, walt_load, reason, true);
|
|
|
|
if (enable_load_sync(cpu)) {
|
|
for_each_cpu(i, &pipeline_sync_cpus) {
|
|
if (cpumask_test_cpu(i, &cpu_array[0][num_sched_clusters-1])) {
|
|
util_prime = max(util_prime,
|
|
__cpu_util_freq_walt(i, &wl_prime, reason, false));
|
|
max_nl_prime = max(max_nl_prime, wl_prime.nl);
|
|
max_pl_prime = max(max_pl_prime, wl_prime.pl);
|
|
} else {
|
|
util_other = max(util_other,
|
|
__cpu_util_freq_walt(i, &wl_other, reason, false));
|
|
max_nl_other = max(max_nl_other, wl_other.nl);
|
|
max_pl_other = max(max_pl_other, wl_other.pl);
|
|
}
|
|
}
|
|
|
|
mpct_other = other_sync_pct(util_other);
|
|
mpct_prime = prime_sync_pct(util_prime);
|
|
|
|
if (cpumask_test_cpu(cpu, &cpu_array[0][num_sched_clusters-1])) {
|
|
util = PIPELINE_SYNC_VAL(util_prime, util_other, mpct_other);
|
|
walt_load->nl = PIPELINE_SYNC_VAL(max_nl_prime, max_nl_other, mpct_other);
|
|
walt_load->pl = PIPELINE_SYNC_VAL(max_pl_prime, max_pl_other, mpct_other);
|
|
trace_sched_load_sync_settings(cpu, util_other, util_prime, mpct_other);
|
|
} else {
|
|
util = PIPELINE_SYNC_VAL(util_other, util_prime, mpct_prime);
|
|
walt_load->nl = PIPELINE_SYNC_VAL(max_nl_other, max_nl_prime, mpct_prime);
|
|
walt_load->pl = PIPELINE_SYNC_VAL(max_pl_other, max_pl_prime, mpct_prime);
|
|
trace_sched_load_sync_settings(cpu, util_other, util_prime, mpct_prime);
|
|
}
|
|
}
|
|
|
|
if (!cpumask_test_cpu(cpu, &asym_cap_sibling_cpus))
|
|
goto finish;
|
|
|
|
if (is_state1())
|
|
goto finish;
|
|
|
|
for_each_cpu(i, &asym_cap_sibling_cpus) {
|
|
if (i != cpu) {
|
|
util_other = max(util_other,
|
|
__cpu_util_freq_walt(i, &wl_other, reason, false));
|
|
max_nl_other = max(max_nl_other, wl_other.nl);
|
|
max_pl_other = max(max_pl_other, wl_other.pl);
|
|
}
|
|
}
|
|
|
|
util = max(util, util_other);
|
|
walt_load->nl = max(walt_load->nl, max_nl_other);
|
|
walt_load->pl = max(walt_load->pl, max_pl_other);
|
|
finish:
|
|
return (util >= capacity) ? capacity : util;
|
|
}
|
|
|
|
/*
|
|
* In this function we match the accumulated subtractions with the current
|
|
* and previous windows we are operating with. Ignore any entries where
|
|
* the window start in the load_subtraction struct does not match either
|
|
* the curent or the previous window. This could happen whenever CPUs
|
|
* become idle or busy with interrupts disabled for an extended period.
|
|
*/
|
|
static inline void account_load_subtractions(struct rq *rq)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u64 ws = wrq->window_start;
|
|
u64 prev_ws = ws - wrq->prev_window_size;
|
|
struct load_subtractions *ls = wrq->load_subs;
|
|
int i;
|
|
|
|
for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
|
|
if (ls[i].window_start == ws) {
|
|
wrq->curr_runnable_sum -= ls[i].subs;
|
|
wrq->nt_curr_runnable_sum -= ls[i].new_subs;
|
|
} else if (ls[i].window_start == prev_ws) {
|
|
wrq->prev_runnable_sum -= ls[i].subs;
|
|
wrq->nt_prev_runnable_sum -= ls[i].new_subs;
|
|
}
|
|
|
|
ls[i].subs = 0;
|
|
ls[i].new_subs = 0;
|
|
}
|
|
|
|
if ((s64)wrq->prev_runnable_sum < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, NULL, "wrq->prev_runnable_sum=%llu < 0",
|
|
(s64)wrq->prev_runnable_sum);
|
|
wrq->prev_runnable_sum = 0;
|
|
}
|
|
if ((s64)wrq->curr_runnable_sum < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, NULL, "wrq->curr_runnable_sum=%llu < 0",
|
|
(s64)wrq->curr_runnable_sum);
|
|
wrq->curr_runnable_sum = 0;
|
|
}
|
|
if ((s64)wrq->nt_prev_runnable_sum < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, NULL, "wrq->nt_prev_runnable_sum=%llu < 0",
|
|
(s64)wrq->nt_prev_runnable_sum);
|
|
wrq->nt_prev_runnable_sum = 0;
|
|
}
|
|
if ((s64)wrq->nt_curr_runnable_sum < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, NULL, "wrq->nt_curr_runnable_sum=%llu < 0",
|
|
(s64)wrq->nt_curr_runnable_sum);
|
|
wrq->nt_curr_runnable_sum = 0;
|
|
}
|
|
}
|
|
|
|
static inline void create_subtraction_entry(struct rq *rq, u64 ws, int index)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
wrq->load_subs[index].window_start = ws;
|
|
wrq->load_subs[index].subs = 0;
|
|
wrq->load_subs[index].new_subs = 0;
|
|
}
|
|
|
|
static int get_top_index(unsigned long *bitmap, unsigned long old_top)
|
|
{
|
|
int index = find_next_bit(bitmap, NUM_LOAD_INDICES, old_top);
|
|
|
|
if (index == NUM_LOAD_INDICES)
|
|
return 0;
|
|
|
|
return NUM_LOAD_INDICES - 1 - index;
|
|
}
|
|
|
|
static bool get_subtraction_index(struct rq *rq, u64 ws)
|
|
{
|
|
int i;
|
|
u64 oldest = ULLONG_MAX;
|
|
int oldest_index = 0;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
|
|
u64 entry_ws = wrq->load_subs[i].window_start;
|
|
|
|
if (ws == entry_ws)
|
|
return i;
|
|
|
|
if (entry_ws < oldest) {
|
|
oldest = entry_ws;
|
|
oldest_index = i;
|
|
}
|
|
}
|
|
|
|
create_subtraction_entry(rq, ws, oldest_index);
|
|
return oldest_index;
|
|
}
|
|
|
|
static void update_rq_load_subtractions(int index, struct rq *rq,
|
|
u32 sub_load, bool new_task)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
wrq->load_subs[index].subs += sub_load;
|
|
if (new_task)
|
|
wrq->load_subs[index].new_subs += sub_load;
|
|
}
|
|
|
|
static void update_cluster_load_subtractions(struct task_struct *p,
|
|
int cpu, u64 ws, bool new_task)
|
|
{
|
|
struct walt_sched_cluster *cluster = cpu_cluster(cpu);
|
|
struct cpumask cluster_cpus = cluster->cpus;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
|
|
u64 prev_ws = ws - wrq->prev_window_size;
|
|
int i;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
cpumask_clear_cpu(cpu, &cluster_cpus);
|
|
raw_spin_lock(&cluster->load_lock);
|
|
|
|
for_each_cpu(i, &cluster_cpus) {
|
|
struct rq *rq = cpu_rq(i);
|
|
int index;
|
|
|
|
if (wts->curr_window_cpu[i]) {
|
|
index = get_subtraction_index(rq, ws);
|
|
update_rq_load_subtractions(index, rq,
|
|
wts->curr_window_cpu[i], new_task);
|
|
wts->curr_window_cpu[i] = 0;
|
|
}
|
|
|
|
if (wts->prev_window_cpu[i]) {
|
|
index = get_subtraction_index(rq, prev_ws);
|
|
update_rq_load_subtractions(index, rq,
|
|
wts->prev_window_cpu[i], new_task);
|
|
wts->prev_window_cpu[i] = 0;
|
|
}
|
|
}
|
|
|
|
raw_spin_unlock(&cluster->load_lock);
|
|
}
|
|
|
|
static inline void migrate_inter_cluster_subtraction(struct task_struct *p, int task_cpu,
|
|
bool new_task)
|
|
{
|
|
struct rq *src_rq = cpu_rq(task_cpu);
|
|
struct walt_rq *src_wrq = &per_cpu(walt_rq, task_cpu);
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
if (src_wrq->curr_runnable_sum < wts->curr_window_cpu[task_cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU%d src_crs=%llu is lesser than task_contrib=%u",
|
|
p->pid, src_rq->cpu,
|
|
src_wrq->curr_runnable_sum,
|
|
wts->curr_window_cpu[task_cpu]);
|
|
src_wrq->curr_runnable_sum = wts->curr_window_cpu[task_cpu];
|
|
}
|
|
src_wrq->curr_runnable_sum -= wts->curr_window_cpu[task_cpu];
|
|
|
|
if (src_wrq->prev_runnable_sum < wts->prev_window_cpu[task_cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU%d src_prs=%llu is lesser than task_contrib=%u",
|
|
p->pid, src_rq->cpu,
|
|
src_wrq->prev_runnable_sum,
|
|
wts->prev_window_cpu[task_cpu]);
|
|
src_wrq->prev_runnable_sum = wts->prev_window_cpu[task_cpu];
|
|
}
|
|
src_wrq->prev_runnable_sum -= wts->prev_window_cpu[task_cpu];
|
|
|
|
if (new_task) {
|
|
if (src_wrq->nt_curr_runnable_sum < wts->curr_window_cpu[task_cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU%d src_nt_crs=%llu is lesser than task_contrib=%u",
|
|
p->pid, src_rq->cpu,
|
|
src_wrq->nt_curr_runnable_sum,
|
|
wts->curr_window_cpu[task_cpu]);
|
|
src_wrq->nt_curr_runnable_sum = wts->curr_window_cpu[task_cpu];
|
|
}
|
|
src_wrq->nt_curr_runnable_sum -=
|
|
wts->curr_window_cpu[task_cpu];
|
|
|
|
if (src_wrq->nt_prev_runnable_sum < wts->prev_window_cpu[task_cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU%d src_nt_prs=%llu is lesser than task_contrib=%u",
|
|
p->pid, src_rq->cpu,
|
|
src_wrq->nt_prev_runnable_sum,
|
|
wts->prev_window_cpu[task_cpu]);
|
|
src_wrq->nt_prev_runnable_sum = wts->prev_window_cpu[task_cpu];
|
|
}
|
|
src_wrq->nt_prev_runnable_sum -=
|
|
wts->prev_window_cpu[task_cpu];
|
|
}
|
|
|
|
wts->curr_window_cpu[task_cpu] = 0;
|
|
wts->prev_window_cpu[task_cpu] = 0;
|
|
|
|
update_cluster_load_subtractions(p, task_cpu,
|
|
src_wrq->window_start, new_task);
|
|
}
|
|
|
|
static inline void migrate_inter_cluster_addition(struct task_struct *p, int new_cpu,
|
|
bool new_task)
|
|
{
|
|
struct walt_rq *dest_wrq = &per_cpu(walt_rq, new_cpu);
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
|
|
wts->curr_window_cpu[new_cpu] = wts->curr_window;
|
|
wts->prev_window_cpu[new_cpu] = wts->prev_window;
|
|
|
|
dest_wrq->curr_runnable_sum += wts->curr_window;
|
|
dest_wrq->prev_runnable_sum += wts->prev_window;
|
|
|
|
if (new_task) {
|
|
dest_wrq->nt_curr_runnable_sum += wts->curr_window;
|
|
dest_wrq->nt_prev_runnable_sum += wts->prev_window;
|
|
}
|
|
}
|
|
|
|
static u32 load_to_index(u32 load)
|
|
{
|
|
u32 index = load / sched_load_granule;
|
|
|
|
return min(index, (u32)(NUM_LOAD_INDICES - 1));
|
|
}
|
|
|
|
static void migrate_top_tasks_subtraction(struct task_struct *p, struct rq *src_rq)
|
|
{
|
|
int index;
|
|
int top_index;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u32 curr_window = wts->curr_window;
|
|
u32 prev_window = wts->prev_window;
|
|
struct walt_rq *src_wrq = &per_cpu(walt_rq, cpu_of(src_rq));
|
|
u8 src = src_wrq->curr_table;
|
|
u8 *src_table;
|
|
|
|
if (curr_window) {
|
|
src_table = src_wrq->top_tasks[src];
|
|
index = load_to_index(curr_window);
|
|
src_table[index] -= 1;
|
|
|
|
if (!src_table[index])
|
|
__clear_bit(NUM_LOAD_INDICES - index - 1,
|
|
src_wrq->top_tasks_bitmap[src]);
|
|
|
|
top_index = src_wrq->curr_top;
|
|
if (index == top_index && !src_table[index])
|
|
src_wrq->curr_top = get_top_index(
|
|
src_wrq->top_tasks_bitmap[src], top_index);
|
|
}
|
|
|
|
if (prev_window) {
|
|
src = 1 - src;
|
|
src_table = src_wrq->top_tasks[src];
|
|
index = load_to_index(prev_window);
|
|
src_table[index] -= 1;
|
|
|
|
if (!src_table[index])
|
|
__clear_bit(NUM_LOAD_INDICES - index - 1,
|
|
src_wrq->top_tasks_bitmap[src]);
|
|
|
|
top_index = src_wrq->prev_top;
|
|
if (index == top_index && !src_table[index])
|
|
src_wrq->prev_top = get_top_index(
|
|
src_wrq->top_tasks_bitmap[src], top_index);
|
|
}
|
|
}
|
|
|
|
static void migrate_top_tasks_addition(struct task_struct *p, struct rq *rq)
|
|
{
|
|
int index;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u32 curr_window = wts->curr_window;
|
|
u32 prev_window = wts->prev_window;
|
|
struct walt_rq *dst_wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u8 dst = dst_wrq->curr_table;
|
|
u8 *dst_table;
|
|
|
|
if (curr_window) {
|
|
dst_table = dst_wrq->top_tasks[dst];
|
|
index = load_to_index(curr_window);
|
|
dst_table[index] += 1;
|
|
|
|
if (dst_table[index] == 1)
|
|
__set_bit(NUM_LOAD_INDICES - index - 1,
|
|
dst_wrq->top_tasks_bitmap[dst]);
|
|
|
|
if (index > dst_wrq->curr_top)
|
|
dst_wrq->curr_top = index;
|
|
}
|
|
|
|
if (prev_window) {
|
|
dst = 1 - dst;
|
|
dst_table = dst_wrq->top_tasks[dst];
|
|
index = load_to_index(prev_window);
|
|
dst_table[index] += 1;
|
|
|
|
if (dst_table[index] == 1)
|
|
__set_bit(NUM_LOAD_INDICES - index - 1,
|
|
dst_wrq->top_tasks_bitmap[dst]);
|
|
|
|
if (index > dst_wrq->prev_top)
|
|
dst_wrq->prev_top = index;
|
|
}
|
|
}
|
|
|
|
static inline bool is_new_task(struct task_struct *p)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
return wts->active_time < NEW_TASK_ACTIVE_TIME;
|
|
}
|
|
static inline int run_walt_irq_work_rollover(u64 old_window_start, struct rq *rq);
|
|
|
|
static void migrate_busy_time_subtraction(struct task_struct *p, int new_cpu)
|
|
{
|
|
struct rq *src_rq = task_rq(p);
|
|
u64 wallclock;
|
|
u64 *src_curr_runnable_sum, *src_prev_runnable_sum;
|
|
u64 *src_nt_curr_runnable_sum, *src_nt_prev_runnable_sum;
|
|
bool new_task;
|
|
struct walt_related_thread_group *grp;
|
|
long pstate;
|
|
struct walt_rq *src_wrq = &per_cpu(walt_rq, cpu_of(src_rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
if (!p->on_rq && READ_ONCE(p->__state) != TASK_WAKING)
|
|
return;
|
|
|
|
pstate = READ_ONCE(p->__state);
|
|
|
|
if (pstate == TASK_WAKING)
|
|
raw_spin_rq_lock(src_rq);
|
|
|
|
walt_lockdep_assert_rq(src_rq, p);
|
|
|
|
if (task_rq(p) != src_rq)
|
|
WALT_BUG(WALT_BUG_UPSTREAM, p, "on CPU %d task %s(%d) not on src_rq %d",
|
|
raw_smp_processor_id(), p->comm, p->pid, src_rq->cpu);
|
|
|
|
wts->new_cpu = new_cpu;
|
|
|
|
if (!same_freq_domain(task_cpu(p), new_cpu))
|
|
wts->enqueue_after_migration = 2; /* 2 is intercluster */
|
|
else
|
|
wts->enqueue_after_migration = 1; /* 1 is within cluster */
|
|
|
|
wallclock = walt_sched_clock();
|
|
walt_update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0);
|
|
|
|
if (wts->window_start != src_wrq->window_start)
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"CPU%d: %s task %s(%d)'s ws=%llu not equal to src_rq %d's ws=%llu",
|
|
raw_smp_processor_id(), __func__, p->comm, p->pid,
|
|
wts->window_start, src_rq->cpu, src_wrq->window_start);
|
|
|
|
|
|
/* safe to update the task cyc cntr for new_cpu without the new_cpu rq_lock */
|
|
update_task_cpu_cycles(p, new_cpu, wallclock);
|
|
|
|
new_task = is_new_task(p);
|
|
/* Protected by rq_lock */
|
|
grp = wts->grp;
|
|
|
|
/*
|
|
* For frequency aggregation, we continue to do migration fixups
|
|
* even for intra cluster migrations. This is because, the aggregated
|
|
* load has to reported on a single CPU regardless.
|
|
*/
|
|
if (grp) {
|
|
struct group_cpu_time *cpu_time = &src_wrq->grp_time;
|
|
|
|
src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
|
|
src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
|
|
src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
|
src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
|
|
|
if (wts->curr_window) {
|
|
*src_curr_runnable_sum -= wts->curr_window;
|
|
if (new_task)
|
|
*src_nt_curr_runnable_sum -= wts->curr_window;
|
|
}
|
|
|
|
if (wts->prev_window) {
|
|
*src_prev_runnable_sum -= wts->prev_window;
|
|
if (new_task)
|
|
*src_nt_prev_runnable_sum -= wts->prev_window;
|
|
}
|
|
} else {
|
|
if (wts->enqueue_after_migration == 2)
|
|
migrate_inter_cluster_subtraction(p, task_cpu(p), new_task);
|
|
}
|
|
|
|
migrate_top_tasks_subtraction(p, src_rq);
|
|
|
|
if (is_ed_enabled() && (p == src_wrq->ed_task))
|
|
src_wrq->ed_task = NULL;
|
|
|
|
wts->prev_cpu = task_cpu(p);
|
|
|
|
if (pstate == TASK_WAKING)
|
|
raw_spin_rq_unlock(src_rq);
|
|
}
|
|
|
|
static void migrate_busy_time_addition(struct task_struct *p, int new_cpu, u64 wallclock)
|
|
{
|
|
struct rq *dest_rq = cpu_rq(new_cpu);
|
|
u64 *dst_curr_runnable_sum, *dst_prev_runnable_sum;
|
|
u64 *dst_nt_curr_runnable_sum, *dst_nt_prev_runnable_sum;
|
|
bool new_task;
|
|
struct walt_related_thread_group *grp;
|
|
struct walt_rq *dest_wrq = &per_cpu(walt_rq, new_cpu);
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
int src_cpu = wts->prev_cpu;
|
|
struct walt_rq *src_wrq = &per_cpu(walt_rq, src_cpu);
|
|
|
|
walt_lockdep_assert_rq(dest_rq, p);
|
|
|
|
walt_update_task_ravg(p, dest_rq, TASK_UPDATE, wallclock, 0);
|
|
|
|
if (wts->window_start != dest_wrq->window_start)
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"CPU%d: %s task %s(%d)'s ws=%llu not equal to dest_rq %d's ws=%llu",
|
|
raw_smp_processor_id(), __func__, p->comm, p->pid,
|
|
wts->window_start, dest_rq->cpu, dest_wrq->window_start);
|
|
|
|
new_task = is_new_task(p);
|
|
/* Protected by rq_lock */
|
|
grp = wts->grp;
|
|
|
|
/*
|
|
* For frequency aggregation, we continue to do migration fixups
|
|
* even for intra cluster migrations. This is because, the aggregated
|
|
* load has to reported on a single CPU regardless.
|
|
*/
|
|
if (grp) {
|
|
struct group_cpu_time *cpu_time = &dest_wrq->grp_time;
|
|
|
|
dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
|
|
dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
|
|
dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
|
dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
|
|
|
if (wts->curr_window) {
|
|
*dst_curr_runnable_sum += wts->curr_window;
|
|
if (new_task)
|
|
*dst_nt_curr_runnable_sum += wts->curr_window;
|
|
}
|
|
|
|
if (wts->prev_window) {
|
|
*dst_prev_runnable_sum += wts->prev_window;
|
|
if (new_task)
|
|
*dst_nt_prev_runnable_sum += wts->prev_window;
|
|
}
|
|
} else {
|
|
if (wts->enqueue_after_migration == 2)
|
|
migrate_inter_cluster_addition(p, new_cpu, new_task);
|
|
}
|
|
|
|
migrate_top_tasks_addition(p, dest_rq);
|
|
|
|
if (wts->enqueue_after_migration == 2) {
|
|
src_wrq->notif_pending = true;
|
|
dest_wrq->notif_pending = true;
|
|
walt_irq_work_queue(&walt_migration_irq_work);
|
|
}
|
|
|
|
if (is_ed_enabled() && is_ed_task(p, wallclock))
|
|
dest_wrq->ed_task = p;
|
|
|
|
wts->new_cpu = -1;
|
|
}
|
|
|
|
#define INC_STEP 8
|
|
#define DEC_STEP 2
|
|
#define CONSISTENT_THRES 16
|
|
#define INC_STEP_BIG 16
|
|
/*
|
|
* bucket_increase - update the count of all buckets
|
|
*
|
|
* @buckets: array of buckets tracking busy time of a task
|
|
* @idx: the index of bucket to be incremented
|
|
*
|
|
* Each time a complete window finishes, count of bucket that runtime
|
|
* falls in (@idx) is incremented. Counts of all other buckets are
|
|
* decayed. The rate of increase and decay could be different based
|
|
* on current count in the bucket.
|
|
*/
|
|
static inline void bucket_increase(u8 *buckets, u16 *bucket_bitmask, int idx)
|
|
{
|
|
int i, step;
|
|
|
|
for (i = 0; i < NUM_BUSY_BUCKETS; i++) {
|
|
if (idx != i) {
|
|
if (buckets[i] > DEC_STEP)
|
|
buckets[i] -= DEC_STEP;
|
|
else {
|
|
buckets[i] = 0;
|
|
*bucket_bitmask &= ~BIT_MASK(i);
|
|
}
|
|
} else {
|
|
step = buckets[i] >= CONSISTENT_THRES ?
|
|
INC_STEP_BIG : INC_STEP;
|
|
if (buckets[i] > U8_MAX - step)
|
|
buckets[i] = U8_MAX;
|
|
else
|
|
buckets[i] += step;
|
|
*bucket_bitmask |= BIT_MASK(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline int busy_to_bucket(u16 normalized_rt)
|
|
{
|
|
int bidx;
|
|
|
|
bidx = normalized_rt >> (SCHED_CAPACITY_SHIFT - NUM_BUSY_BUCKETS_SHIFT);
|
|
bidx = min(bidx, NUM_BUSY_BUCKETS - 1);
|
|
|
|
/*
|
|
* Combine lowest two buckets. The lowest frequency falls into
|
|
* 2nd bucket and thus keep predicting lowest bucket is not
|
|
* useful.
|
|
*/
|
|
if (!bidx)
|
|
bidx++;
|
|
|
|
return bidx;
|
|
}
|
|
|
|
/*
|
|
* get_pred_busy - calculate predicted demand for a task on runqueue
|
|
*
|
|
* @p: task whose prediction is being updated
|
|
* @start: starting bucket. returned prediction should not be lower than
|
|
* this bucket.
|
|
* @runtime: runtime of the task. returned prediction should not be lower
|
|
* than this runtime.
|
|
* Note: @start can be derived from @runtime. It's passed in only to
|
|
* avoid duplicated calculation in some cases.
|
|
*
|
|
* A new predicted busy time is returned for task @p based on @runtime
|
|
* passed in. The function searches through buckets that represent busy
|
|
* time equal to or bigger than @runtime and attempts to find the bucket
|
|
* to use for prediction. Once found, it searches through historical busy
|
|
* time and returns the latest that falls into the bucket. If no such busy
|
|
* time exists, it returns the medium of that bucket.
|
|
*/
|
|
static u32 get_pred_busy(struct task_struct *p,
|
|
int start, u16 runtime_scaled, u16 bucket_bitmask)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u16 dmin, dmax;
|
|
int first = NUM_BUSY_BUCKETS, final = NUM_BUSY_BUCKETS;
|
|
u16 ret = runtime_scaled;
|
|
u16 next_mask = bucket_bitmask >> start;
|
|
u16 *hist_util = wts->sum_history_util;
|
|
int i;
|
|
|
|
/* skip prediction for new tasks due to lack of history */
|
|
if (unlikely(is_new_task(p)))
|
|
goto out;
|
|
|
|
/* find minimal bucket index to pick */
|
|
if (next_mask)
|
|
first = ffs(next_mask) - 1 + start;
|
|
|
|
/* if no higher buckets are filled, predict runtime */
|
|
if (first >= NUM_BUSY_BUCKETS)
|
|
goto out;
|
|
|
|
/* compute the bucket for prediction */
|
|
final = first;
|
|
|
|
/* determine demand range for the predicted bucket */
|
|
if (final < 2) {
|
|
/* lowest two buckets are combined */
|
|
dmin = 0;
|
|
final = 1;
|
|
} else {
|
|
dmin = final << (SCHED_CAPACITY_SHIFT - NUM_BUSY_BUCKETS_SHIFT);
|
|
}
|
|
dmax = (final + 1) << (SCHED_CAPACITY_SHIFT - NUM_BUSY_BUCKETS_SHIFT);
|
|
|
|
/*
|
|
* search through runtime history and return first runtime that falls
|
|
* into the range of predicted bucket.
|
|
*/
|
|
for (i = 0; i < RAVG_HIST_SIZE; i++) {
|
|
if (hist_util[i] >= dmin && hist_util[i] < dmax) {
|
|
ret = hist_util[i];
|
|
break;
|
|
}
|
|
}
|
|
/* no historical runtime within bucket found, use average of the bin */
|
|
if (ret < dmin)
|
|
ret = (u16) (((u32)dmin + dmax) / 2);
|
|
/*
|
|
* when updating in middle of a window, runtime could be higher
|
|
* than all recorded history. Always predict at least runtime.
|
|
*/
|
|
ret = max(runtime_scaled, ret);
|
|
out:
|
|
trace_sched_update_pred_demand(p, runtime_scaled,
|
|
ret, start, first, final, wts);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* predictive demand of a task was calculated at the last window roll-over.
|
|
* if the task current window busy time exceeds the predicted
|
|
* demand, update it here to reflect the task needs.
|
|
*/
|
|
static void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
|
|
{
|
|
u16 new_pred_demand_scaled;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u16 curr_window_scaled;
|
|
|
|
if (walt_is_idle_task(p))
|
|
return;
|
|
|
|
if (event != PUT_PREV_TASK && event != TASK_UPDATE &&
|
|
(!SCHED_FREQ_ACCOUNT_WAIT_TIME ||
|
|
(event != TASK_MIGRATE &&
|
|
event != PICK_NEXT_TASK)))
|
|
return;
|
|
|
|
/*
|
|
* TASK_UPDATE can be called on sleeping task, when its moved between
|
|
* related groups
|
|
*/
|
|
if (event == TASK_UPDATE) {
|
|
if (!p->on_rq && !SCHED_FREQ_ACCOUNT_WAIT_TIME)
|
|
return;
|
|
}
|
|
|
|
curr_window_scaled = scale_time_to_util(wts->curr_window);
|
|
if (wts->pred_demand_scaled >= curr_window_scaled)
|
|
return;
|
|
|
|
new_pred_demand_scaled = get_pred_busy(p, busy_to_bucket(curr_window_scaled),
|
|
curr_window_scaled, wts->bucket_bitmask);
|
|
|
|
if (task_on_rq_queued(p))
|
|
fixup_walt_sched_stats_common(rq, p,
|
|
wts->demand_scaled,
|
|
new_pred_demand_scaled);
|
|
|
|
wts->pred_demand_scaled = new_pred_demand_scaled;
|
|
}
|
|
|
|
static void clear_top_tasks_bitmap(unsigned long *bitmap)
|
|
{
|
|
memset(bitmap, 0, top_tasks_bitmap_size);
|
|
__set_bit(NUM_LOAD_INDICES, bitmap);
|
|
}
|
|
|
|
static inline void clear_top_tasks_table(u8 *table)
|
|
{
|
|
memset(table, 0, NUM_LOAD_INDICES * sizeof(u8));
|
|
}
|
|
|
|
static void update_top_tasks(struct task_struct *p, struct rq *rq,
|
|
u32 old_curr_window, int new_window, bool full_window)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u8 curr = wrq->curr_table;
|
|
u8 prev = 1 - curr;
|
|
u8 *curr_table = wrq->top_tasks[curr];
|
|
u8 *prev_table = wrq->top_tasks[prev];
|
|
int old_index, new_index, update_index;
|
|
u32 curr_window = wts->curr_window;
|
|
u32 prev_window = wts->prev_window;
|
|
bool zero_index_update;
|
|
|
|
if (old_curr_window == curr_window && !new_window)
|
|
return;
|
|
|
|
old_index = load_to_index(old_curr_window);
|
|
new_index = load_to_index(curr_window);
|
|
|
|
if (!new_window) {
|
|
zero_index_update = !old_curr_window && curr_window;
|
|
if (old_index != new_index || zero_index_update) {
|
|
if (old_curr_window)
|
|
curr_table[old_index] -= 1;
|
|
if (curr_window)
|
|
curr_table[new_index] += 1;
|
|
if (new_index > wrq->curr_top)
|
|
wrq->curr_top = new_index;
|
|
}
|
|
|
|
if (!curr_table[old_index])
|
|
__clear_bit(NUM_LOAD_INDICES - old_index - 1,
|
|
wrq->top_tasks_bitmap[curr]);
|
|
|
|
if (curr_table[new_index] == 1)
|
|
__set_bit(NUM_LOAD_INDICES - new_index - 1,
|
|
wrq->top_tasks_bitmap[curr]);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* The window has rolled over for this task. By the time we get
|
|
* here, curr/prev swaps would has already occurred. So we need
|
|
* to use prev_window for the new index.
|
|
*/
|
|
update_index = load_to_index(prev_window);
|
|
|
|
if (full_window) {
|
|
/*
|
|
* Two cases here. Either 'p' ran for the entire window or
|
|
* it didn't run at all. In either case there is no entry
|
|
* in the prev table. If 'p' ran the entire window, we just
|
|
* need to create a new entry in the prev table. In this case
|
|
* update_index will be correspond to sched_ravg_window
|
|
* so we can unconditionally update the top index.
|
|
*/
|
|
if (prev_window) {
|
|
prev_table[update_index] += 1;
|
|
wrq->prev_top = update_index;
|
|
}
|
|
|
|
if (prev_table[update_index] == 1)
|
|
__set_bit(NUM_LOAD_INDICES - update_index - 1,
|
|
wrq->top_tasks_bitmap[prev]);
|
|
} else {
|
|
zero_index_update = !old_curr_window && prev_window;
|
|
if (old_index != update_index || zero_index_update) {
|
|
if (old_curr_window)
|
|
prev_table[old_index] -= 1;
|
|
|
|
prev_table[update_index] += 1;
|
|
|
|
if (update_index > wrq->prev_top)
|
|
wrq->prev_top = update_index;
|
|
|
|
if (!prev_table[old_index])
|
|
__clear_bit(NUM_LOAD_INDICES - old_index - 1,
|
|
wrq->top_tasks_bitmap[prev]);
|
|
|
|
if (prev_table[update_index] == 1)
|
|
__set_bit(NUM_LOAD_INDICES - update_index - 1,
|
|
wrq->top_tasks_bitmap[prev]);
|
|
}
|
|
}
|
|
|
|
if (curr_window) {
|
|
curr_table[new_index] += 1;
|
|
|
|
if (new_index > wrq->curr_top)
|
|
wrq->curr_top = new_index;
|
|
|
|
if (curr_table[new_index] == 1)
|
|
__set_bit(NUM_LOAD_INDICES - new_index - 1,
|
|
wrq->top_tasks_bitmap[curr]);
|
|
}
|
|
}
|
|
|
|
static void rollover_top_tasks(struct rq *rq, bool full_window)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u8 curr_table = wrq->curr_table;
|
|
u8 prev_table = 1 - curr_table;
|
|
int curr_top = wrq->curr_top;
|
|
|
|
clear_top_tasks_table(wrq->top_tasks[prev_table]);
|
|
clear_top_tasks_bitmap(wrq->top_tasks_bitmap[prev_table]);
|
|
|
|
if (full_window) {
|
|
curr_top = 0;
|
|
clear_top_tasks_table(wrq->top_tasks[curr_table]);
|
|
clear_top_tasks_bitmap(wrq->top_tasks_bitmap[curr_table]);
|
|
}
|
|
|
|
wrq->curr_table = prev_table;
|
|
wrq->prev_top = curr_top;
|
|
wrq->curr_top = 0;
|
|
}
|
|
|
|
static u32 empty_windows[WALT_NR_CPUS];
|
|
|
|
static void rollover_task_window(struct task_struct *p, bool full_window)
|
|
{
|
|
u32 *curr_cpu_windows = empty_windows;
|
|
u32 curr_window;
|
|
int i;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(task_rq(p)));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
/* Rollover the sum */
|
|
curr_window = 0;
|
|
|
|
if (!full_window) {
|
|
curr_window = wts->curr_window;
|
|
curr_cpu_windows = wts->curr_window_cpu;
|
|
}
|
|
|
|
wts->prev_window = curr_window;
|
|
wts->curr_window = 0;
|
|
|
|
/* Roll over individual CPU contributions */
|
|
for (i = 0; i < nr_cpu_ids; i++) {
|
|
wts->prev_window_cpu[i] = curr_cpu_windows[i];
|
|
wts->curr_window_cpu[i] = 0;
|
|
}
|
|
|
|
if (is_new_task(p))
|
|
wts->active_time += wrq->prev_window_size;
|
|
}
|
|
|
|
static inline int cpu_is_waiting_on_io(struct rq *rq)
|
|
{
|
|
if (!sched_io_is_busy)
|
|
return 0;
|
|
|
|
return atomic_read(&rq->nr_iowait);
|
|
}
|
|
|
|
static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p,
|
|
u64 irqtime, int event)
|
|
{
|
|
if (walt_is_idle_task(p)) {
|
|
/* TASK_WAKE && TASK_MIGRATE is not possible on idle task! */
|
|
if (event == PICK_NEXT_TASK)
|
|
return 0;
|
|
|
|
/* PUT_PREV_TASK, TASK_UPDATE && IRQ_UPDATE are left */
|
|
return irqtime || cpu_is_waiting_on_io(rq);
|
|
}
|
|
|
|
if (event == TASK_WAKE)
|
|
return 0;
|
|
|
|
if (event == PUT_PREV_TASK || event == IRQ_UPDATE)
|
|
return 1;
|
|
|
|
/*
|
|
* TASK_UPDATE can be called on sleeping task, when its moved between
|
|
* related groups
|
|
*/
|
|
if (event == TASK_UPDATE) {
|
|
if (rq->curr == p)
|
|
return 1;
|
|
|
|
return p->on_rq ? SCHED_FREQ_ACCOUNT_WAIT_TIME : 0;
|
|
}
|
|
|
|
/* TASK_MIGRATE, PICK_NEXT_TASK left */
|
|
return SCHED_FREQ_ACCOUNT_WAIT_TIME;
|
|
}
|
|
|
|
#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
|
|
|
|
static inline u64 scale_exec_time(u64 delta, struct rq *rq, struct walt_task_struct *wts)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
delta = (delta * wrq->task_exec_scale) >> SCHED_CAPACITY_SHIFT;
|
|
|
|
if (wts->load_boost && wts->grp)
|
|
delta = (delta * (1024 + wts->boosted_task_load) >> 10);
|
|
|
|
return delta;
|
|
}
|
|
|
|
/* Convert busy time to frequency equivalent
|
|
* Assumes load is scaled to 1024
|
|
*/
|
|
static inline unsigned int load_to_freq(struct rq *rq, unsigned int load)
|
|
{
|
|
return mult_frac(cpu_max_possible_freq(cpu_of(rq)), load,
|
|
(unsigned int)arch_scale_cpu_capacity(cpu_of(rq)));
|
|
}
|
|
|
|
static bool do_pl_notif(struct rq *rq)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u64 prev = wrq->old_busy_time;
|
|
u64 pl = wrq->walt_stats.pred_demands_sum_scaled;
|
|
int cpu = cpu_of(rq);
|
|
|
|
/* If already at max freq, bail out */
|
|
if (capacity_orig_of(cpu) == capacity_curr_of(cpu))
|
|
return false;
|
|
|
|
prev = max(prev, wrq->old_estimated_time);
|
|
|
|
/* 400 MHz filter. */
|
|
return (pl > prev) && (load_to_freq(rq, pl - prev) > 400000);
|
|
}
|
|
|
|
static void rollover_cpu_window(struct rq *rq, bool full_window)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u64 curr_sum = wrq->curr_runnable_sum;
|
|
u64 nt_curr_sum = wrq->nt_curr_runnable_sum;
|
|
u64 grp_curr_sum = wrq->grp_time.curr_runnable_sum;
|
|
u64 grp_nt_curr_sum = wrq->grp_time.nt_curr_runnable_sum;
|
|
|
|
if (unlikely(full_window)) {
|
|
curr_sum = 0;
|
|
nt_curr_sum = 0;
|
|
grp_curr_sum = 0;
|
|
grp_nt_curr_sum = 0;
|
|
}
|
|
|
|
wrq->prev_runnable_sum = curr_sum;
|
|
wrq->nt_prev_runnable_sum = nt_curr_sum;
|
|
wrq->grp_time.prev_runnable_sum = grp_curr_sum;
|
|
wrq->grp_time.nt_prev_runnable_sum = grp_nt_curr_sum;
|
|
|
|
wrq->curr_runnable_sum = 0;
|
|
wrq->nt_curr_runnable_sum = 0;
|
|
wrq->grp_time.curr_runnable_sum = 0;
|
|
wrq->grp_time.nt_curr_runnable_sum = 0;
|
|
}
|
|
|
|
/*
|
|
* Account cpu activity in its
|
|
* busy time counters(wrq->curr/prev_runnable_sum)
|
|
*
|
|
* While the comments at the top of update_task_demand() apply, irqtime handling
|
|
* needs some explanation.
|
|
*
|
|
* Note that update_task_ravg() with irqtime is only called when idle, i.e. p is
|
|
* always idle
|
|
*
|
|
* ms_i = mark_start of idle task
|
|
* ws = wrq->window_start
|
|
* irq_s = start time of irq
|
|
* irq_e = end time of irq = wallclock
|
|
*
|
|
* note irqtime = irq_e - irq_s
|
|
*
|
|
* Similar to the explanation at update_task_demand() we have few sitautions for irqtime
|
|
*
|
|
* ws ms_i is ie
|
|
* | | | |
|
|
* V V V V
|
|
* --------|--------------------|
|
|
* prev curr
|
|
*
|
|
* In the above case, new_window is false and irqtime is accounted in curr_runnable_sum, this is
|
|
* done in the if (!new_window) block.
|
|
*
|
|
* ms_i ws is ie
|
|
* | | | |
|
|
* V V V V
|
|
* -------------|---------------------
|
|
* prev curr
|
|
*
|
|
* In this case, new_window is true, however the irqtime falls within the current window, the
|
|
* entire irqtime is accounted in curr_runnable_sum. This is handled in the if (irqtime) block and
|
|
* within that if (mark_start > window_start) block
|
|
*
|
|
* ms_i is ws ie
|
|
* | | | |
|
|
* V V V V
|
|
* --------------------|---------------
|
|
* prev curr
|
|
*
|
|
* In this case, new_window is true, portion of the irqtime needs to be accounted in
|
|
* prev_runnable_sum while the rest is in curr_runnable_sum. This is handled in the
|
|
* if (irqtime) block
|
|
*/
|
|
static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
|
|
int event, u64 wallclock, u64 irqtime)
|
|
{
|
|
int new_window, full_window = 0;
|
|
int p_is_curr_task = (p == rq->curr);
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u64 mark_start = wts->mark_start;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u64 window_start = wrq->window_start;
|
|
u32 window_size = wrq->prev_window_size;
|
|
u64 delta;
|
|
u64 *curr_runnable_sum = &wrq->curr_runnable_sum;
|
|
u64 *prev_runnable_sum = &wrq->prev_runnable_sum;
|
|
u64 *nt_curr_runnable_sum = &wrq->nt_curr_runnable_sum;
|
|
u64 *nt_prev_runnable_sum = &wrq->nt_prev_runnable_sum;
|
|
bool new_task;
|
|
struct walt_related_thread_group *grp;
|
|
int cpu = rq->cpu;
|
|
u32 old_curr_window = wts->curr_window;
|
|
|
|
walt_lockdep_assert_rq(rq, p);
|
|
|
|
new_window = mark_start < window_start;
|
|
if (new_window)
|
|
full_window = (window_start - mark_start) >= window_size;
|
|
|
|
/*
|
|
* Handle per-task window rollover. We don't care about the
|
|
* idle task.
|
|
*/
|
|
if (new_window) {
|
|
if (!walt_is_idle_task(p))
|
|
rollover_task_window(p, full_window);
|
|
wts->window_start = window_start;
|
|
}
|
|
|
|
new_task = is_new_task(p);
|
|
|
|
if (!account_busy_for_cpu_time(rq, p, irqtime, event))
|
|
goto done;
|
|
|
|
grp = wts->grp;
|
|
if (grp) {
|
|
struct group_cpu_time *cpu_time = &wrq->grp_time;
|
|
|
|
curr_runnable_sum = &cpu_time->curr_runnable_sum;
|
|
prev_runnable_sum = &cpu_time->prev_runnable_sum;
|
|
|
|
nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
|
nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
|
}
|
|
|
|
if (wts->window_start != wrq->window_start)
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"CPU%d: %s task %s(%d)'s ws=%llu not equal to rq %d's ws=%llu",
|
|
raw_smp_processor_id(), __func__, p->comm, p->pid,
|
|
wts->window_start, rq->cpu, wrq->window_start);
|
|
|
|
if (!new_window) {
|
|
/*
|
|
* account_busy_for_cpu_time() = 1 so busy time needs
|
|
* to be accounted to the current window. No rollover
|
|
* since we didn't start a new window. An example of this is
|
|
* when a task starts execution and then sleeps within the
|
|
* same window.
|
|
*/
|
|
|
|
if (!irqtime || !walt_is_idle_task(p) || cpu_is_waiting_on_io(rq))
|
|
delta = wallclock - mark_start;
|
|
else
|
|
delta = irqtime;
|
|
delta = scale_exec_time(delta, rq, wts);
|
|
*curr_runnable_sum += delta;
|
|
if (new_task)
|
|
*nt_curr_runnable_sum += delta;
|
|
|
|
if (!walt_is_idle_task(p)) {
|
|
wts->curr_window += delta;
|
|
wts->curr_window_cpu[cpu] += delta;
|
|
}
|
|
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* situations below this need window rollover,
|
|
* Rollover of cpu counters (curr/prev_runnable_sum) should have already be done
|
|
* in update_window_start()
|
|
*
|
|
* For task counters curr/prev_window[_cpu] are rolled over in the early part of
|
|
* this function. If full_window(s) have expired and time since last update needs
|
|
* to be accounted as busy time, set the prev to a complete window size time, else
|
|
* add the prev window portion.
|
|
*
|
|
* For task curr counters a new window has begun, always assign
|
|
*/
|
|
|
|
if (!p_is_curr_task) {
|
|
/*
|
|
* account_busy_for_cpu_time() = 1 so busy time needs
|
|
* to be accounted to the current window. A new window
|
|
* must have been started in udpate_window_start()
|
|
* - just split up and account as necessary into curr and prev.
|
|
*
|
|
* Irqtime can't be accounted by a task that isn't the
|
|
* currently running task.
|
|
*/
|
|
|
|
if (!full_window) {
|
|
/*
|
|
* A full window hasn't elapsed, account partial
|
|
* contribution to previous completed window.
|
|
*/
|
|
delta = scale_exec_time(window_start - mark_start, rq, wts);
|
|
wts->prev_window += delta;
|
|
wts->prev_window_cpu[cpu] += delta;
|
|
} else {
|
|
/*
|
|
* Since at least one full window has elapsed,
|
|
* the contribution to the previous window is the
|
|
* full window (window_size).
|
|
*/
|
|
delta = scale_exec_time(window_size, rq, wts);
|
|
wts->prev_window = delta;
|
|
wts->prev_window_cpu[cpu] = delta;
|
|
}
|
|
|
|
*prev_runnable_sum += delta;
|
|
if (new_task)
|
|
*nt_prev_runnable_sum += delta;
|
|
|
|
/* Account piece of busy time in the current window. */
|
|
delta = scale_exec_time(wallclock - window_start, rq, wts);
|
|
*curr_runnable_sum += delta;
|
|
if (new_task)
|
|
*nt_curr_runnable_sum += delta;
|
|
|
|
wts->curr_window = delta;
|
|
wts->curr_window_cpu[cpu] = delta;
|
|
|
|
goto done;
|
|
}
|
|
|
|
if (!irqtime || !walt_is_idle_task(p) || cpu_is_waiting_on_io(rq)) {
|
|
/*
|
|
* account_busy_for_cpu_time() = 1 so busy time needs
|
|
* to be accounted to the current window. A new window
|
|
* must have been started in udpate_window_start()
|
|
* If any of these three above conditions are true
|
|
* then this busy time can't be accounted as irqtime.
|
|
*
|
|
* Busy time for the idle task need not be accounted.
|
|
*
|
|
* An example of this would be a task that starts execution
|
|
* and then sleeps once a new window has begun.
|
|
*/
|
|
|
|
if (!full_window) {
|
|
/*
|
|
* A full window hasn't elapsed, account partial
|
|
* contribution to previous completed window.
|
|
*/
|
|
delta = scale_exec_time(window_start - mark_start, rq, wts);
|
|
if (!walt_is_idle_task(p)) {
|
|
wts->prev_window += delta;
|
|
wts->prev_window_cpu[cpu] += delta;
|
|
}
|
|
} else {
|
|
/*
|
|
* Since at least one full window has elapsed,
|
|
* the contribution to the previous window is the
|
|
* full window (window_size).
|
|
*/
|
|
delta = scale_exec_time(window_size, rq, wts);
|
|
if (!walt_is_idle_task(p)) {
|
|
wts->prev_window = delta;
|
|
wts->prev_window_cpu[cpu] = delta;
|
|
}
|
|
}
|
|
|
|
*prev_runnable_sum += delta;
|
|
if (new_task)
|
|
*nt_prev_runnable_sum += delta;
|
|
|
|
/* Account piece of busy time in the current window. */
|
|
delta = scale_exec_time(wallclock - window_start, rq, wts);
|
|
*curr_runnable_sum += delta;
|
|
if (new_task)
|
|
*nt_curr_runnable_sum += delta;
|
|
|
|
if (!walt_is_idle_task(p)) {
|
|
wts->curr_window = delta;
|
|
wts->curr_window_cpu[cpu] = delta;
|
|
}
|
|
|
|
goto done;
|
|
}
|
|
|
|
if (irqtime) {
|
|
/*
|
|
* account_busy_for_cpu_time() = 1 so busy time needs
|
|
* to be accounted to the current window. A new window
|
|
* must have been started in udpate_window_start()
|
|
* The current task must be the idle task because
|
|
* irqtime is not accounted for any other task.
|
|
*
|
|
* Irqtime will be accounted each time we process IRQ activity
|
|
* after a period of idleness, so we know the IRQ busy time
|
|
* started at wallclock - irqtime.
|
|
*/
|
|
|
|
WALT_PANIC(!walt_is_idle_task(p));
|
|
/* mark_start here becomes the starting time of interrupt */
|
|
mark_start = wallclock - irqtime;
|
|
|
|
/*
|
|
* If IRQ busy time was just in the current
|
|
* window then that is all that need be accounted.
|
|
*/
|
|
if (mark_start > window_start) {
|
|
*curr_runnable_sum += scale_exec_time(irqtime, rq, wts);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* The IRQ busy time spanned multiple windows. Process the
|
|
* busy time preceding the current window start first.
|
|
*/
|
|
delta = window_start - mark_start;
|
|
if (delta > window_size)
|
|
delta = window_size;
|
|
delta = scale_exec_time(delta, rq, wts);
|
|
*prev_runnable_sum += delta;
|
|
|
|
/* Process the remaining IRQ busy time in the current window. */
|
|
delta = wallclock - window_start;
|
|
wrq->curr_runnable_sum += scale_exec_time(delta, rq, wts);
|
|
|
|
return;
|
|
}
|
|
|
|
done:
|
|
if (!walt_is_idle_task(p))
|
|
update_top_tasks(p, rq, old_curr_window,
|
|
new_window, full_window);
|
|
}
|
|
|
|
static inline u16 predict_and_update_buckets(
|
|
struct task_struct *p, u16 runtime_scaled) {
|
|
int bidx;
|
|
u32 pred_demand_scaled;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
bidx = busy_to_bucket(runtime_scaled);
|
|
pred_demand_scaled = get_pred_busy(p, bidx, runtime_scaled, wts->bucket_bitmask);
|
|
bucket_increase(wts->busy_buckets, &wts->bucket_bitmask, bidx);
|
|
|
|
return pred_demand_scaled;
|
|
}
|
|
|
|
static int
|
|
account_busy_for_task_demand(struct rq *rq, struct task_struct *p, int event)
|
|
{
|
|
/*
|
|
* No need to bother updating task demand for the idle task.
|
|
*/
|
|
if (walt_is_idle_task(p))
|
|
return 0;
|
|
|
|
/*
|
|
* When a task is waking up it is completing a segment of non-busy
|
|
* time. Likewise, if wait time is not treated as busy time, then
|
|
* when a task begins to run or is migrated, it is not running and
|
|
* is completing a segment of non-busy time.
|
|
*/
|
|
if (event == TASK_WAKE || (!SCHED_ACCOUNT_WAIT_TIME &&
|
|
(event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
|
|
return 0;
|
|
|
|
/*
|
|
* The idle exit time is not accounted for the first task _picked_ up to
|
|
* run on the idle CPU.
|
|
*/
|
|
if (event == PICK_NEXT_TASK && rq->curr == rq->idle)
|
|
return 0;
|
|
|
|
/*
|
|
* TASK_UPDATE can be called on sleeping task, when its moved between
|
|
* related groups
|
|
*/
|
|
if (event == TASK_UPDATE) {
|
|
if (rq->curr == p)
|
|
return 1;
|
|
|
|
return p->on_rq ? SCHED_ACCOUNT_WAIT_TIME : 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
#define TRAILBLAZER_THRES 230
|
|
#define TRAILBLAZER_BYPASS 243
|
|
#define FINAL_BUCKET_STEP_UP 8
|
|
#define FINAL_BUCKET_STEP_DOWN 1
|
|
|
|
static inline u32 scale_util_to_time(u16 util)
|
|
{
|
|
return util * walt_scale_demand_divisor;
|
|
}
|
|
|
|
static void update_trailblazer_accounting(struct task_struct *p, struct rq *rq,
|
|
u32 runtime, u16 runtime_scaled, u32 *demand, u16 *trailblazer_demand)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
bool is_prev_trailblazer = walt_flag_test(p, WALT_TRAILBLAZER_BIT);
|
|
u64 trailblazer_capacity;
|
|
|
|
if (walt_feat(WALT_FEAT_TRAILBLAZER_BIT) &&
|
|
(((runtime >= *demand) && (wts->high_util_history >= TRAILBLAZER_THRES)) ||
|
|
wts->high_util_history >= TRAILBLAZER_BYPASS)) {
|
|
*trailblazer_demand = 1 << SCHED_CAPACITY_SHIFT;
|
|
*demand = scale_util_to_time(*trailblazer_demand);
|
|
walt_flag_set(p, WALT_TRAILBLAZER_BIT, 1);
|
|
} else if (is_prev_trailblazer) {
|
|
walt_flag_set(p, WALT_TRAILBLAZER_BIT, 0);
|
|
}
|
|
|
|
/*
|
|
* In the event that a trailblazer task is detected (or an existing trailblazer task
|
|
* no longer matches the criteria) and is already enqueued on the cpu, ensure to
|
|
* close the prod-sum accounts for this task before the next update takes place.
|
|
*/
|
|
if (task_on_rq_queued(p)) {
|
|
if (is_prev_trailblazer != walt_flag_test(p, WALT_TRAILBLAZER_BIT))
|
|
sched_update_nr_prod(rq->cpu, 0);
|
|
if (is_prev_trailblazer && !walt_flag_test(p, WALT_TRAILBLAZER_BIT))
|
|
wrq->walt_stats.nr_trailblazer_tasks--;
|
|
else if (!is_prev_trailblazer && walt_flag_test(p, WALT_TRAILBLAZER_BIT))
|
|
wrq->walt_stats.nr_trailblazer_tasks++;
|
|
}
|
|
|
|
/*
|
|
* The CPU might be running with capped capacities. In order for a runtime to be considered
|
|
* as trailblazer worthy, it must be 87.5% or more of the prime CPU capacity.
|
|
*/
|
|
trailblazer_capacity =
|
|
capacity_orig_of(cpumask_first(&cpu_array[0][num_sched_clusters - 1]));
|
|
trailblazer_capacity = trailblazer_capacity - (trailblazer_capacity >> 3);
|
|
if (runtime_scaled >= (u16)trailblazer_capacity) {
|
|
if (wts->high_util_history > U8_MAX - FINAL_BUCKET_STEP_UP)
|
|
wts->high_util_history = U8_MAX;
|
|
else
|
|
wts->high_util_history += FINAL_BUCKET_STEP_UP;
|
|
} else if (wts->high_util_history) {
|
|
wts->high_util_history -= FINAL_BUCKET_STEP_DOWN;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Called when new window is starting for a task, to record cpu usage over
|
|
* recently concluded window(s). Normally 'samples' should be 1. It can be > 1
|
|
* when, say, a real-time task runs without preemption for several windows at a
|
|
* stretch.
|
|
*/
|
|
static void update_history(struct rq *rq, struct task_struct *p,
|
|
u32 runtime, int samples, int event)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u32 *hist = &wts->sum_history[0];
|
|
u16 *hist_util = &wts->sum_history_util[0];
|
|
int i;
|
|
u32 max = 0, avg, demand;
|
|
u64 sum = 0;
|
|
u16 demand_scaled, pred_demand_scaled, runtime_scaled;
|
|
u16 trailblazer_demand = 0;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
/* clear yield status of task if there is a change in window */
|
|
|
|
if ((wts->yield_state & YIELD_CNT_MASK) < MAX_YIELD_CNT_PER_TASK_THR)
|
|
wts->yield_state = 0;
|
|
|
|
/* Ignore windows where task had no activity */
|
|
if (!runtime || walt_is_idle_task(p) || !samples)
|
|
goto done;
|
|
|
|
runtime_scaled = scale_time_to_util(runtime);
|
|
/* Push new 'runtime' value onto stack */
|
|
for (; samples > 0; samples--) {
|
|
hist[wts->cidx] = runtime;
|
|
hist_util[wts->cidx] = runtime_scaled;
|
|
wts->cidx++;
|
|
wts->cidx = wts->cidx % RAVG_HIST_SIZE;
|
|
}
|
|
|
|
for (i = 0; i < RAVG_HIST_SIZE; i++) {
|
|
sum += hist[i];
|
|
if (hist[i] > max)
|
|
max = hist[i];
|
|
}
|
|
|
|
wts->sum = 0;
|
|
avg = div64_u64(sum, RAVG_HIST_SIZE);
|
|
|
|
switch (sysctl_sched_window_stats_policy) {
|
|
case WINDOW_STATS_RECENT:
|
|
demand = runtime;
|
|
break;
|
|
case WINDOW_STATS_MAX:
|
|
demand = max;
|
|
break;
|
|
case WINDOW_STATS_AVG:
|
|
demand = avg;
|
|
break;
|
|
default:
|
|
demand = max(avg, runtime);
|
|
}
|
|
|
|
if (walt_fair_task(p))
|
|
update_trailblazer_accounting(p, rq, runtime, runtime_scaled,
|
|
&demand, &trailblazer_demand);
|
|
pred_demand_scaled = predict_and_update_buckets(p, runtime_scaled);
|
|
demand_scaled = scale_time_to_util(demand);
|
|
|
|
/*
|
|
* Avoid double accounting of task demand as demand will be updated
|
|
* to CRA as part of enqueue/dequeue.
|
|
*
|
|
* When window is rolled over, the cumulative window demand
|
|
* is reset to the cumulative runnable average (contribution from
|
|
* the tasks on the runqueue). If the current task is dequeued
|
|
* already, it's demand is not included in the cumulative runnable
|
|
* average. So add the task demand separately to cumulative window
|
|
* demand.
|
|
*/
|
|
if (task_on_rq_queued(p))
|
|
fixup_walt_sched_stats_common(rq, p,
|
|
demand_scaled, pred_demand_scaled);
|
|
|
|
wts->demand = demand;
|
|
wts->demand_scaled = demand_scaled;
|
|
wts->coloc_demand = avg;
|
|
wts->pred_demand_scaled = pred_demand_scaled;
|
|
|
|
if (demand_scaled > sysctl_sched_min_task_util_for_colocation)
|
|
wts->unfilter = sysctl_sched_task_unfilter_period;
|
|
else
|
|
if (wts->unfilter)
|
|
wts->unfilter = max_t(int, 0,
|
|
wts->unfilter - wrq->prev_window_size);
|
|
done:
|
|
trace_sched_update_history(rq, p, runtime, samples, event, wrq, wts, trailblazer_demand);
|
|
}
|
|
|
|
static u64 add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
delta = scale_exec_time(delta, rq, wts);
|
|
wts->sum += delta;
|
|
if (unlikely(wts->sum > sched_ravg_window))
|
|
wts->sum = sched_ravg_window;
|
|
|
|
return delta;
|
|
}
|
|
|
|
/*
|
|
* Account cpu demand of task and/or update task's cpu demand history
|
|
*
|
|
* ms = wts->mark_start;
|
|
* wc = wallclock
|
|
* ws = wrq->window_start
|
|
*
|
|
* Three possibilities:
|
|
*
|
|
* a) Task event is contained within one window.
|
|
* window_start < mark_start < wallclock
|
|
*
|
|
* ws ms wc
|
|
* | | |
|
|
* V V V
|
|
* |---------------|
|
|
*
|
|
* In this case, wts->sum is updated *iff* event is appropriate
|
|
* (ex: event == PUT_PREV_TASK)
|
|
*
|
|
* b) Task event spans two windows.
|
|
* mark_start < window_start < wallclock
|
|
*
|
|
* ms ws wc
|
|
* | | |
|
|
* V V V
|
|
* -----|-------------------
|
|
*
|
|
* In this case, wts->sum is updated with (ws - ms) *iff* event
|
|
* is appropriate, then a new window sample is recorded followed
|
|
* by wts->sum being set to (wc - ws) *iff* event is appropriate.
|
|
*
|
|
* c) Task event spans more than two windows.
|
|
*
|
|
* ms ws_tmp ws wc
|
|
* | | | |
|
|
* V V V V
|
|
* ---|-------|-------|-------|-------|------
|
|
* | |
|
|
* |<------ nr_full_windows ------>|
|
|
*
|
|
* In this case, wts->sum is updated with (ws_tmp - ms) first *iff*
|
|
* event is appropriate, window sample of wts->sum is recorded,
|
|
* 'nr_full_window' samples of window_size is also recorded *iff*
|
|
* event is appropriate and finally wts->sum is set to (wc - ws)
|
|
* *iff* event is appropriate.
|
|
*
|
|
* IMPORTANT : Leave wts->mark_start unchanged, as update_cpu_busy_time()
|
|
* depends on it!
|
|
*/
|
|
static u64 update_task_demand(struct task_struct *p, struct rq *rq,
|
|
int event, u64 wallclock)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u64 mark_start = wts->mark_start;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u64 delta, window_start = wrq->window_start;
|
|
int new_window, nr_full_windows;
|
|
u32 window_size = sched_ravg_window;
|
|
u64 runtime;
|
|
|
|
new_window = mark_start < window_start;
|
|
if (!account_busy_for_task_demand(rq, p, event)) {
|
|
if (new_window)
|
|
/*
|
|
* If the time accounted isn't being accounted as
|
|
* busy time, and a new window started, only the
|
|
* previous window need be closed out with the
|
|
* pre-existing demand. Multiple windows may have
|
|
* elapsed, but since empty windows are dropped,
|
|
* it is not necessary to account those.
|
|
*/
|
|
update_history(rq, p, wts->sum, 1, event);
|
|
return 0;
|
|
}
|
|
|
|
if (!new_window) {
|
|
/*
|
|
* The simple case - busy time contained within the existing
|
|
* window.
|
|
*/
|
|
return add_to_task_demand(rq, p, wallclock - mark_start);
|
|
}
|
|
|
|
/*
|
|
* Busy time spans at least two windows. Temporarily rewind
|
|
* window_start to first window boundary after mark_start.
|
|
*/
|
|
delta = window_start - mark_start;
|
|
nr_full_windows = div64_u64(delta, window_size);
|
|
window_start -= (u64)nr_full_windows * (u64)window_size;
|
|
|
|
/* Process (window_start - mark_start) first */
|
|
runtime = add_to_task_demand(rq, p, window_start - mark_start);
|
|
|
|
/* Push new sample(s) into task's demand history */
|
|
update_history(rq, p, wts->sum, 1, event);
|
|
if (nr_full_windows) {
|
|
u64 scaled_window = scale_exec_time(window_size, rq, wts);
|
|
|
|
update_history(rq, p, scaled_window, nr_full_windows, event);
|
|
runtime += nr_full_windows * scaled_window;
|
|
}
|
|
|
|
/*
|
|
* Roll window_start back to current to process any remainder
|
|
* in current window.
|
|
*/
|
|
window_start += (u64)nr_full_windows * (u64)window_size;
|
|
|
|
/* Process (wallclock - window_start) next */
|
|
mark_start = window_start;
|
|
runtime += add_to_task_demand(rq, p, wallclock - mark_start);
|
|
|
|
return runtime;
|
|
}
|
|
|
|
static inline unsigned int cpu_cur_freq(int cpu)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
|
|
|
|
return wrq->cluster->cur_freq;
|
|
}
|
|
|
|
static void
|
|
update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event,
|
|
u64 wallclock, u64 irqtime)
|
|
{
|
|
u64 cur_cycles;
|
|
u64 cycles_delta;
|
|
u64 time_delta;
|
|
int cpu = cpu_of(rq);
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
walt_lockdep_assert_rq(rq, p);
|
|
|
|
if (!use_cycle_counter) {
|
|
wrq->task_exec_scale = DIV64_U64_ROUNDUP(cpu_cur_freq(cpu) *
|
|
arch_scale_cpu_capacity(cpu),
|
|
wrq->cluster->max_possible_freq);
|
|
return;
|
|
}
|
|
|
|
cur_cycles = read_cycle_counter(cpu, wallclock);
|
|
|
|
/*
|
|
* If current task is idle task and irqtime == 0 CPU was
|
|
* indeed idle and probably its cycle counter was not
|
|
* increasing. We still need estimatied CPU frequency
|
|
* for IO wait time accounting. Use the previously
|
|
* calculated frequency in such a case.
|
|
*/
|
|
if (!walt_is_idle_task(rq->curr) || irqtime) {
|
|
if (unlikely(cur_cycles < wts->cpu_cycles))
|
|
cycles_delta = cur_cycles + (U64_MAX -
|
|
wts->cpu_cycles);
|
|
else
|
|
cycles_delta = cur_cycles - wts->cpu_cycles;
|
|
cycles_delta = cycles_delta * NSEC_PER_MSEC;
|
|
|
|
if (event == IRQ_UPDATE && walt_is_idle_task(p))
|
|
/*
|
|
* Time between mark_start of idle task and IRQ handler
|
|
* entry time is CPU cycle counter stall period.
|
|
* Upon IRQ handler entry walt_sched_account_irqstart()
|
|
* replenishes idle task's cpu cycle counter so
|
|
* cycles_delta now represents increased cycles during
|
|
* IRQ handler rather than time between idle entry and
|
|
* IRQ exit. Thus use irqtime as time delta.
|
|
*/
|
|
time_delta = irqtime;
|
|
else
|
|
time_delta = wallclock - wts->mark_start;
|
|
|
|
if ((s64)time_delta < 0) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU%d wallclock=%llu(0x%llx) < mark_start=%llu(0x%llx) event=%d irqtime=%llu",
|
|
p->pid, rq->cpu, wallclock, wallclock,
|
|
wts->mark_start, wts->mark_start, event, irqtime);
|
|
time_delta = 1;
|
|
}
|
|
|
|
wrq->task_exec_scale = DIV64_U64_ROUNDUP(cycles_delta *
|
|
arch_scale_cpu_capacity(cpu),
|
|
time_delta *
|
|
wrq->cluster->max_possible_freq);
|
|
|
|
trace_sched_get_task_cpu_cycles(cpu, event,
|
|
cycles_delta, time_delta, p);
|
|
}
|
|
|
|
wts->cpu_cycles = cur_cycles;
|
|
}
|
|
|
|
/*
|
|
* Returns
|
|
* 0: if window rollover not required or is not the winning CPU.
|
|
* 1: if this CPU is tasked with window rollover duties.
|
|
*/
|
|
static inline int run_walt_irq_work_rollover(u64 old_window_start, struct rq *rq)
|
|
{
|
|
u64 result;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
if (old_window_start == wrq->window_start)
|
|
return 0;
|
|
|
|
result = atomic64_cmpxchg(&walt_irq_work_lastq_ws, old_window_start,
|
|
wrq->window_start);
|
|
if (result == old_window_start) {
|
|
walt_irq_work_queue(&walt_cpufreq_irq_work);
|
|
trace_walt_window_rollover(wrq->window_start);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void set_bits(struct walt_task_struct *wts,
|
|
int nr_bits, bool set_bit)
|
|
{
|
|
int mask = 0;
|
|
|
|
if (nr_bits > 16)
|
|
nr_bits = 16;
|
|
wts->busy_bitmap = wts->busy_bitmap << nr_bits;
|
|
if (set_bit)
|
|
mask = (1 << nr_bits) - 1;
|
|
|
|
wts->busy_bitmap |= mask;
|
|
}
|
|
|
|
/*
|
|
* Easy Case
|
|
*
|
|
* | ms wc |
|
|
* | | | |
|
|
* +----------+------------+-------------+--------
|
|
* | contrib->+----------->| |
|
|
* | | | |
|
|
* boundary next_ms_boundary
|
|
*
|
|
*
|
|
* ms in old ms boundary while wc in new ms boundary, which case the code accounts for bit
|
|
* until then next_ms_boundary and from next_ms_boundary to wc gets accounted in period
|
|
*
|
|
* | ms | wc
|
|
* | | | |
|
|
* +----------+-------------------------+----------+-
|
|
* | | | contrib->|
|
|
* | | |
|
|
* boundary next_ms_boundary
|
|
*
|
|
*
|
|
* multiple boundaries between ms and wc, which case the code accounts for bit
|
|
* until the next_ms_boundary and fills in the interm periods and the leftover from
|
|
* the closest is accounted in period
|
|
*
|
|
* | ms | | wc
|
|
* | | | | |
|
|
* +----------+-------------------------+------periods----------+---------+--------
|
|
* | | | |contrib->
|
|
* | | | |
|
|
* boundary next_ms_boundary
|
|
*/
|
|
static void update_busy_bitmap(struct task_struct *p, struct rq *rq, int event,
|
|
u64 wallclock)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *)p->android_vendor_data1;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, task_cpu(p));
|
|
u64 next_ms_boundary, delta;
|
|
int periods;
|
|
bool running;
|
|
int no_boost_reason = 0;
|
|
|
|
/*
|
|
* If it has been active for more than 4mS turn it off, the task that caused this activation
|
|
* should have slept and if its still running it must have updated its load via
|
|
* prs. No need to continue boosting.
|
|
*/
|
|
if (wallclock > wrq->lrb_pipeline_start_time + 4000000)
|
|
wrq->lrb_pipeline_start_time = 0;
|
|
|
|
if (!pipeline_in_progress())
|
|
return;
|
|
|
|
/*
|
|
* Figure out whether pipeline_cpu, cpu_of(rq) are both same or if it
|
|
* even matters.
|
|
*/
|
|
if (wts->pipeline_cpu == -1)
|
|
return;
|
|
|
|
if (wallclock < wts->mark_start) {
|
|
WALT_BUG(WALT_BUG_WALT, p, "on CPU%d: %s task %s(%d) mark_start %llu is higher than wallclock %llu\n",
|
|
raw_smp_processor_id(), __func__, p->comm, p->pid,
|
|
wts->mark_start, wallclock);
|
|
wallclock = wts->mark_start;
|
|
}
|
|
|
|
running = account_busy_for_cpu_time(rq, p, 0, event);
|
|
|
|
/* task woke up or utra happened while its asleep, clear old boosts */
|
|
if (p->on_rq == 0)
|
|
walt_flag_set(p, WALT_LRB_PIPELINE_BIT, 0);
|
|
|
|
next_ms_boundary = ((wts->mark_start + (NSEC_PER_MSEC - 1)) / NSEC_PER_MSEC) *
|
|
NSEC_PER_MSEC;
|
|
if (wallclock < next_ms_boundary) {
|
|
if (running)
|
|
wts->period_contrib_run += wallclock - wts->mark_start;
|
|
goto out;
|
|
}
|
|
|
|
/* Exceeding a ms boundary */
|
|
|
|
/* Close the bit corresponding to the mark_start */
|
|
if (running)
|
|
wts->period_contrib_run += next_ms_boundary - wts->mark_start;
|
|
/* Set the bit representing the ms if runtime within that ms is more than 500us*/
|
|
if (wts->period_contrib_run > 500000)
|
|
set_bits(wts, 1, true);
|
|
else
|
|
set_bits(wts, 1, false);
|
|
wts->period_contrib_run = 0;
|
|
|
|
/* Account the action starting from next_ms_boundary to the closest ms boundary */
|
|
delta = wallclock - next_ms_boundary;
|
|
periods = delta / NSEC_PER_MSEC;
|
|
|
|
if (periods) {
|
|
if (running)
|
|
set_bits(wts, periods, true);
|
|
else
|
|
set_bits(wts, periods, false);
|
|
}
|
|
|
|
/* Start contributions for latest ms */
|
|
if (running)
|
|
wts->period_contrib_run = wallclock % NSEC_PER_MSEC;
|
|
|
|
/* task had already set a boost since wakeup, boost just once since wakeup */
|
|
if (walt_flag_test(p, WALT_LRB_PIPELINE_BIT)) {
|
|
no_boost_reason = 1;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* task is not on_rq - if it is in the process of waking up, boost will be applied on the
|
|
* right cpu at PICK event
|
|
*/
|
|
if (p->on_rq == 0) {
|
|
no_boost_reason = 2;
|
|
goto out;
|
|
}
|
|
|
|
if (sched_ravg_window <= SCHED_RAVG_8MS_WINDOW &&
|
|
((hweight16(wts->busy_bitmap & 0x00FF) < sysctl_sched_lrpb_active_ms[0]) ||
|
|
!sysctl_sched_lrpb_active_ms[0])) {
|
|
no_boost_reason = 3;
|
|
goto out;
|
|
}
|
|
|
|
if (sched_ravg_window == SCHED_RAVG_12MS_WINDOW &&
|
|
((hweight16(wts->busy_bitmap & 0x0FFF) < sysctl_sched_lrpb_active_ms[1]) ||
|
|
!sysctl_sched_lrpb_active_ms[1])) {
|
|
no_boost_reason = 4;
|
|
goto out;
|
|
}
|
|
|
|
if (sched_ravg_window >= SCHED_RAVG_16MS_WINDOW &&
|
|
((hweight16(wts->busy_bitmap) < sysctl_sched_lrpb_active_ms[2]) ||
|
|
!sysctl_sched_lrpb_active_ms[2])) {
|
|
no_boost_reason = 5;
|
|
goto out;
|
|
}
|
|
|
|
/* cpu already boosted, so dont extend */
|
|
if (wrq->lrb_pipeline_start_time != 0) {
|
|
no_boost_reason = 6;
|
|
goto out;
|
|
}
|
|
|
|
walt_flag_set(p, WALT_LRB_PIPELINE_BIT, 1);
|
|
wrq->lrb_pipeline_start_time = wallclock;
|
|
|
|
out:
|
|
trace_sched_update_busy_bitmap(p, rq, wts, wrq, event,
|
|
wallclock, next_ms_boundary, no_boost_reason);
|
|
}
|
|
|
|
/* Reflect task activity on its demand and cpu's busy time statistics */
|
|
static void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event,
|
|
u64 wallclock, u64 irqtime)
|
|
{
|
|
u64 old_window_start;
|
|
int this_cpu_runs_window_rollover;
|
|
bool old_lrb_pipeline_task_state;
|
|
bool old_lrb_pipeline_cpu_state;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
if (!wrq->window_start || wts->mark_start == wallclock)
|
|
return;
|
|
|
|
if (unlikely(!raw_spin_is_locked(&rq->__lock))) {
|
|
WALT_BUG(WALT_BUG_WALT, p, "on CPU%d: %s task %s(%d) unlocked access for cpu=%d suspended=%d last_clk=%llu stack[%pS <== %pS <== %pS]\n",
|
|
raw_smp_processor_id(), __func__, p->comm, p->pid, rq->cpu,
|
|
walt_clock_suspended, sched_clock_last,
|
|
(void *)CALLER_ADDR0, (void *)CALLER_ADDR1, (void *)CALLER_ADDR2);
|
|
}
|
|
|
|
walt_lockdep_assert_rq(rq, p);
|
|
|
|
old_window_start = update_window_start(rq, wallclock, event);
|
|
old_lrb_pipeline_task_state = walt_flag_test(p, WALT_LRB_PIPELINE_BIT);
|
|
old_lrb_pipeline_cpu_state = wrq->lrb_pipeline_start_time;
|
|
if (!wts->window_start)
|
|
wts->window_start = wrq->window_start;
|
|
|
|
if (!wts->mark_start) {
|
|
update_task_cpu_cycles(p, cpu_of(rq), wallclock);
|
|
goto done;
|
|
}
|
|
|
|
update_task_rq_cpu_cycles(p, rq, event, wallclock, irqtime);
|
|
update_task_demand(p, rq, event, wallclock);
|
|
update_cpu_busy_time(p, rq, event, wallclock, irqtime);
|
|
update_task_pred_demand(rq, p, event);
|
|
update_busy_bitmap(p, rq, event, wallclock);
|
|
if (event == PUT_PREV_TASK && READ_ONCE(p->__state))
|
|
wts->iowaited = p->in_iowait;
|
|
|
|
trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime,
|
|
&wrq->grp_time, wrq, wts, atomic64_read(&walt_irq_work_lastq_ws));
|
|
trace_sched_update_task_ravg_mini(p, rq, event, wallclock, irqtime,
|
|
&wrq->grp_time, wrq, wts, atomic64_read(&walt_irq_work_lastq_ws));
|
|
|
|
done:
|
|
wts->mark_start = wallclock;
|
|
if (wts->mark_start > (wts->window_start + sched_ravg_window))
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"CPU%d: %s task %s(%d)'s ms=%llu is ahead of ws=%llu by more than 1 window on rq=%d event=%d",
|
|
raw_smp_processor_id(), __func__, p->comm, p->pid,
|
|
wts->mark_start, wts->window_start, rq->cpu, event);
|
|
|
|
this_cpu_runs_window_rollover = run_walt_irq_work_rollover(old_window_start, rq);
|
|
if (likely(!this_cpu_runs_window_rollover)) {
|
|
if ((unlikely(wts->pipeline_cpu != -1) &&
|
|
task_cpu(p) == cpu_of(rq) &&
|
|
!old_lrb_pipeline_task_state &&
|
|
walt_flag_test(p, WALT_LRB_PIPELINE_BIT)) ||
|
|
(old_lrb_pipeline_cpu_state && !wrq->lrb_pipeline_start_time))
|
|
waltgov_run_callback(rq, WALT_CPUFREQ_PIPELINE_BUSY_BIT);
|
|
}
|
|
}
|
|
|
|
static inline void __sched_fork_init(struct task_struct *p)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
wts->last_sleep_ts = 0;
|
|
wts->wake_up_idle = false;
|
|
wts->boost = 0;
|
|
wts->boost_expires = 0;
|
|
wts->boost_period = false;
|
|
wts->low_latency = false;
|
|
wts->iowaited = false;
|
|
wts->load_boost = 0;
|
|
wts->boosted_task_load = 0;
|
|
wts->reduce_mask = CPU_MASK_ALL;
|
|
}
|
|
|
|
static void init_new_task_load(struct task_struct *p)
|
|
{
|
|
int i;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
struct walt_task_struct *cur_wts =
|
|
(struct walt_task_struct *) current->android_vendor_data1;
|
|
u32 init_load_windows = sched_init_task_load_windows;
|
|
u32 init_load_windows_scaled = sched_init_task_load_windows_scaled;
|
|
u32 init_load_pct = cur_wts->init_load_pct;
|
|
struct cpufreq_policy *policy;
|
|
unsigned long cpuinfo_max = 0;
|
|
unsigned long scaling_max = 0;
|
|
|
|
wts->init_load_pct = 0;
|
|
rcu_assign_pointer(wts->grp, NULL);
|
|
INIT_LIST_HEAD(&wts->grp_list);
|
|
|
|
wts->prev_cpu = raw_smp_processor_id();
|
|
wts->new_cpu = -1;
|
|
wts->enqueue_after_migration = 0;
|
|
wts->mark_start = 0;
|
|
wts->window_start = 0;
|
|
wts->sum = 0;
|
|
wts->curr_window = 0;
|
|
wts->prev_window = 0;
|
|
wts->active_time = 0;
|
|
wts->prev_on_rq = 0;
|
|
wts->prev_on_rq_cpu = -1;
|
|
wts->pipeline_cpu = -1;
|
|
wts->yield_state = 0;
|
|
wts->busy_bitmap = 0;
|
|
wts->period_contrib_run = 0;
|
|
|
|
for (i = 0; i < NUM_BUSY_BUCKETS; ++i)
|
|
wts->busy_buckets[i] = 0;
|
|
wts->bucket_bitmask = 0;
|
|
wts->cpu_cycles = 0;
|
|
|
|
memset(wts->curr_window_cpu, 0, sizeof(u32) * WALT_NR_CPUS);
|
|
memset(wts->prev_window_cpu, 0, sizeof(u32) * WALT_NR_CPUS);
|
|
|
|
if (trail_active && sustain_active && task_in_related_thread_group(p->group_leader) && (p->prio <= 120)) {
|
|
policy = cpufreq_cpu_get_raw(WALT_NR_CPUS-1);
|
|
if (policy) {
|
|
cpuinfo_max = policy->cpuinfo.max_freq;
|
|
scaling_max = policy->max;
|
|
}
|
|
if ((scaling_max > 0) && (cpuinfo_max == scaling_max))
|
|
init_load_pct = 90;
|
|
}
|
|
|
|
if (init_load_pct) {
|
|
init_load_windows = div64_u64((u64)init_load_pct *
|
|
(u64)sched_ravg_window, 100);
|
|
init_load_windows_scaled = scale_time_to_util(init_load_windows);
|
|
}
|
|
|
|
wts->demand = init_load_windows;
|
|
wts->demand_scaled = init_load_windows_scaled;
|
|
wts->coloc_demand = init_load_windows;
|
|
wts->pred_demand_scaled = 0;
|
|
for (i = 0; i < RAVG_HIST_SIZE; ++i)
|
|
wts->sum_history[i] = init_load_windows;
|
|
wts->misfit = false;
|
|
wts->rtg_high_prio = false;
|
|
wts->unfilter = sysctl_sched_task_unfilter_period;
|
|
|
|
INIT_LIST_HEAD(&wts->mvp_list);
|
|
wts->sum_exec_snapshot_for_slice = 0;
|
|
wts->sum_exec_snapshot_for_total = 0;
|
|
wts->total_exec = 0;
|
|
wts->mvp_prio = WALT_NOT_MVP;
|
|
wts->cidx = 0;
|
|
wts->mark_start_birth_ts = 0;
|
|
wts->high_util_history = 0;
|
|
__sched_fork_init(p);
|
|
|
|
/* New task inherits the MPAM part_id */
|
|
wts->mpam_part_id = cur_wts->mpam_part_id;
|
|
|
|
walt_flag_set(p, WALT_INIT_BIT, 1);
|
|
walt_flag_set(p, WALT_TRAILBLAZER_BIT, 0);
|
|
}
|
|
|
|
int remove_heavy(struct walt_task_struct *wts);
|
|
static void walt_task_dead(struct task_struct *p)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
sched_set_group_id(p, 0);
|
|
|
|
if (wts->low_latency & WALT_LOW_LATENCY_PIPELINE_BIT)
|
|
remove_pipeline(wts);
|
|
|
|
if (wts->low_latency & WALT_LOW_LATENCY_HEAVY_BIT)
|
|
remove_heavy(wts);
|
|
|
|
if (p == pipeline_special_task)
|
|
remove_special_task();
|
|
}
|
|
|
|
static void mark_task_starting(struct task_struct *p)
|
|
{
|
|
struct rq *rq = task_rq(p);
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
u64 wallclock = walt_rq_clock(rq);
|
|
|
|
wts->last_wake_ts = wallclock;
|
|
wts->last_enqueued_ts = wallclock;
|
|
wts->mark_start_birth_ts = wallclock;
|
|
|
|
if (wts->mark_start)
|
|
return;
|
|
walt_update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
|
|
}
|
|
|
|
/*
|
|
* Task groups whose aggregate demand on a cpu is more than
|
|
* sched_group_upmigrate need to be up-migrated if possible.
|
|
*/
|
|
static unsigned int __read_mostly sched_group_upmigrate = 20000000;
|
|
|
|
/*
|
|
* Task groups, once up-migrated, will need to drop their aggregate
|
|
* demand to less than sched_group_downmigrate before they are "down"
|
|
* migrated.
|
|
*/
|
|
static unsigned int __read_mostly sched_group_downmigrate = 19000000;
|
|
|
|
void walt_update_group_thresholds(void)
|
|
{
|
|
unsigned int min_scale = arch_scale_cpu_capacity(
|
|
cluster_first_cpu(sched_cluster[0]));
|
|
u64 min_ms = min_scale * (sched_ravg_window >> SCHED_CAPACITY_SHIFT);
|
|
|
|
sched_group_upmigrate = div64_ul(min_ms *
|
|
sysctl_sched_group_upmigrate_pct, 100);
|
|
sched_group_downmigrate = div64_ul(min_ms *
|
|
sysctl_sched_group_downmigrate_pct, 100);
|
|
}
|
|
|
|
struct walt_sched_cluster *sched_cluster[WALT_NR_CPUS];
|
|
__read_mostly int num_sched_clusters;
|
|
|
|
struct list_head cluster_head;
|
|
|
|
static struct walt_sched_cluster init_cluster = {
|
|
.list = LIST_HEAD_INIT(init_cluster.list),
|
|
.id = 0,
|
|
.cur_freq = 1,
|
|
.max_possible_freq = 1,
|
|
.aggr_grp_load = 0,
|
|
.found_ts = 0,
|
|
};
|
|
|
|
static void init_clusters(void)
|
|
{
|
|
init_cluster.cpus = *cpu_possible_mask;
|
|
raw_spin_lock_init(&init_cluster.load_lock);
|
|
INIT_LIST_HEAD(&cluster_head);
|
|
list_add(&init_cluster.list, &cluster_head);
|
|
}
|
|
|
|
static void
|
|
insert_cluster(struct walt_sched_cluster *cluster, struct list_head *head)
|
|
{
|
|
struct walt_sched_cluster *tmp;
|
|
struct list_head *iter = head;
|
|
|
|
list_for_each_entry(tmp, head, list) {
|
|
if (arch_scale_cpu_capacity(cluster_first_cpu(cluster))
|
|
< arch_scale_cpu_capacity(cluster_first_cpu(tmp)))
|
|
break;
|
|
iter = &tmp->list;
|
|
}
|
|
|
|
list_add(&cluster->list, iter);
|
|
}
|
|
|
|
static struct walt_sched_cluster *alloc_new_cluster(const struct cpumask *cpus)
|
|
{
|
|
struct walt_sched_cluster *cluster = NULL;
|
|
|
|
cluster = kzalloc(sizeof(struct walt_sched_cluster), GFP_ATOMIC);
|
|
BUG_ON(!cluster);
|
|
|
|
INIT_LIST_HEAD(&cluster->list);
|
|
cluster->cur_freq = 1;
|
|
cluster->max_freq = 1;
|
|
cluster->max_possible_freq = 1;
|
|
|
|
raw_spin_lock_init(&cluster->load_lock);
|
|
cluster->cpus = *cpus;
|
|
cluster->found_ts = 0;
|
|
|
|
return cluster;
|
|
}
|
|
|
|
static void add_cluster(const struct cpumask *cpus, struct list_head *head)
|
|
{
|
|
struct walt_sched_cluster *cluster = alloc_new_cluster(cpus);
|
|
int i;
|
|
struct walt_rq *wrq;
|
|
|
|
BUG_ON(num_sched_clusters >= MAX_CLUSTERS);
|
|
|
|
for_each_cpu(i, cpus) {
|
|
wrq = &per_cpu(walt_rq, i);
|
|
wrq->cluster = cluster;
|
|
}
|
|
|
|
insert_cluster(cluster, head);
|
|
num_sched_clusters++;
|
|
}
|
|
|
|
static void cleanup_clusters(struct list_head *head)
|
|
{
|
|
struct walt_sched_cluster *cluster, *tmp;
|
|
int i;
|
|
struct walt_rq *wrq;
|
|
|
|
list_for_each_entry_safe(cluster, tmp, head, list) {
|
|
for_each_cpu(i, &cluster->cpus) {
|
|
wrq = &per_cpu(walt_rq, i);
|
|
wrq->cluster = &init_cluster;
|
|
}
|
|
list_del(&cluster->list);
|
|
num_sched_clusters--;
|
|
kfree(cluster);
|
|
}
|
|
}
|
|
|
|
static inline void align_clusters(struct list_head *head)
|
|
{
|
|
struct walt_sched_cluster *tmp;
|
|
struct list_head *cluster1 = head, *cluster2 = head;
|
|
unsigned long capacity1 = 0, capacity2 = 0;
|
|
int i = 0;
|
|
|
|
if (num_sched_clusters != 4)
|
|
return;
|
|
|
|
list_for_each_entry(tmp, head, list) {
|
|
if (i == 1) {
|
|
cluster1 = &tmp->list;
|
|
capacity1 = arch_scale_cpu_capacity(cluster_first_cpu(tmp));
|
|
}
|
|
if (i == 2) {
|
|
cluster2 = &tmp->list;
|
|
capacity2 = arch_scale_cpu_capacity(cluster_first_cpu(tmp));
|
|
}
|
|
i++;
|
|
}
|
|
|
|
if (capacity1 < capacity2)
|
|
list_swap(cluster1, cluster2);
|
|
}
|
|
|
|
static inline void assign_cluster_ids(struct list_head *head)
|
|
{
|
|
struct walt_sched_cluster *cluster;
|
|
int pos = 0;
|
|
|
|
list_for_each_entry(cluster, head, list) {
|
|
cluster->id = pos;
|
|
sched_cluster[pos++] = cluster;
|
|
}
|
|
|
|
WARN_ON(pos > MAX_CLUSTERS);
|
|
}
|
|
|
|
static inline void
|
|
move_list(struct list_head *dst, struct list_head *src, bool sync_rcu)
|
|
{
|
|
struct list_head *first, *last;
|
|
|
|
first = src->next;
|
|
last = src->prev;
|
|
|
|
if (sync_rcu) {
|
|
INIT_LIST_HEAD_RCU(src);
|
|
synchronize_rcu();
|
|
}
|
|
|
|
first->prev = dst;
|
|
dst->prev = last;
|
|
last->next = dst;
|
|
|
|
/* Ensure list sanity before making the head visible to all CPUs. */
|
|
smp_mb();
|
|
dst->next = first;
|
|
}
|
|
|
|
static void update_all_clusters_stats(void)
|
|
{
|
|
struct walt_sched_cluster *cluster;
|
|
u64 highest_mpc = 0, lowest_mpc = U64_MAX;
|
|
|
|
for_each_sched_cluster(cluster) {
|
|
u64 mpc = arch_scale_cpu_capacity(
|
|
cluster_first_cpu(cluster));
|
|
int cluster_id = cluster->id;
|
|
|
|
if (mpc > highest_mpc) {
|
|
highest_mpc = mpc;
|
|
max_possible_cluster_id = cluster_id;
|
|
}
|
|
|
|
if (mpc < lowest_mpc) {
|
|
lowest_mpc = mpc;
|
|
min_possible_cluster_id = cluster_id;
|
|
}
|
|
}
|
|
walt_update_group_thresholds();
|
|
}
|
|
|
|
static bool walt_clusters_parsed;
|
|
cpumask_t __read_mostly **cpu_array;
|
|
|
|
u8 cpu_arrays_init_x11[1][1] = {
|
|
{0}, /* S */
|
|
};
|
|
|
|
u8 cpu_arrays_init_x22[2][2] = {
|
|
{0, 1}, /* S G */
|
|
{1, 0}, /* G S */
|
|
};
|
|
|
|
u8 cpu_arrays_init_x33[3][3] = {
|
|
{0, 1, 2}, /* S G P */
|
|
{1, 2, 0}, /* G P S */
|
|
{2, 1, 0}, /* P G S */
|
|
};
|
|
|
|
u8 cpu_arrays_init_x44[4][4] = {
|
|
{0, 2, 1, 3}, /* S T G P */
|
|
{1, 2, 3, 0}, /* G T P S */
|
|
{2, 3, 1, 0}, /* T P G S */
|
|
{3, 1, 2, 0}, /* P G T S */
|
|
};
|
|
|
|
static void init_cpu_array(void)
|
|
{
|
|
int i;
|
|
int rows = num_sched_clusters;
|
|
|
|
cpu_array = kcalloc(rows, sizeof(cpumask_t *),
|
|
GFP_ATOMIC | __GFP_NOFAIL);
|
|
if (!cpu_array)
|
|
WALT_PANIC(1);
|
|
|
|
for (i = 0; i < rows; i++) {
|
|
cpu_array[i] = kcalloc(num_sched_clusters, sizeof(cpumask_t),
|
|
GFP_ATOMIC | __GFP_NOFAIL);
|
|
if (!cpu_array[i])
|
|
WALT_PANIC(1);
|
|
}
|
|
}
|
|
|
|
static void build_cpu_array(void)
|
|
{
|
|
u8 *select_init_list;
|
|
u8 id;
|
|
int i, j;
|
|
|
|
if (!cpu_array)
|
|
WALT_PANIC(1);
|
|
|
|
switch (num_sched_clusters) {
|
|
case 1:
|
|
select_init_list = (u8 *)cpu_arrays_init_x11;
|
|
break;
|
|
case 2:
|
|
select_init_list = (u8 *)cpu_arrays_init_x22;
|
|
break;
|
|
case 3:
|
|
select_init_list = (u8 *)cpu_arrays_init_x33;
|
|
break;
|
|
case 4:
|
|
select_init_list = (u8 *)cpu_arrays_init_x44;
|
|
break;
|
|
default:
|
|
pr_err("unsupported num clusters=%d\n", num_sched_clusters);
|
|
WALT_PANIC(1);
|
|
}
|
|
|
|
for (i = 0; i < num_sched_clusters; i++) {
|
|
for (j = 0; j < num_sched_clusters; j++) {
|
|
id = select_init_list[i * num_sched_clusters + j];
|
|
cpumask_copy(&cpu_array[i][j], &sched_cluster[id]->cpus);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void walt_get_possible_siblings(int cpuid, struct cpumask *cluster_cpus)
|
|
{
|
|
int cpu;
|
|
struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
|
|
|
|
if (cpuid_topo->cluster_id == -1)
|
|
return;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
cpu_topo = &cpu_topology[cpu];
|
|
|
|
if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
|
|
continue;
|
|
cpumask_set_cpu(cpu, cluster_cpus);
|
|
}
|
|
}
|
|
|
|
int cpu_l2_sibling[WALT_NR_CPUS] = {[0 ... WALT_NR_CPUS-1] = -1};
|
|
static void find_cache_siblings(void)
|
|
{
|
|
int cpu, cpu2;
|
|
struct device_node *cpu_dev, *cpu_dev2, *cpu_l2_cache_node, *cpu_l2_cache_node2;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
cpu_dev = of_get_cpu_node(cpu, NULL);
|
|
if (!cpu_dev)
|
|
continue;
|
|
|
|
cpu_l2_cache_node = of_parse_phandle(cpu_dev, "next-level-cache", 0);
|
|
if (!cpu_l2_cache_node)
|
|
continue;
|
|
|
|
for_each_possible_cpu(cpu2) {
|
|
if (cpu == cpu2)
|
|
continue;
|
|
|
|
cpu_dev2 = of_get_cpu_node(cpu2, NULL);
|
|
if (!cpu_dev2)
|
|
continue;
|
|
|
|
cpu_l2_cache_node2 = of_parse_phandle(cpu_dev2, "next-level-cache", 0);
|
|
if (!cpu_l2_cache_node2)
|
|
continue;
|
|
|
|
if (cpu_l2_cache_node == cpu_l2_cache_node2) {
|
|
cpu_l2_sibling[cpu] = cpu2;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void walt_update_cluster_topology(void)
|
|
{
|
|
struct cpumask cpus = *cpu_possible_mask;
|
|
struct cpumask cluster_cpus;
|
|
struct walt_sched_cluster *cluster;
|
|
struct list_head new_head;
|
|
int i;
|
|
struct walt_rq *wrq;
|
|
|
|
INIT_LIST_HEAD(&new_head);
|
|
|
|
for_each_cpu(i, &cpus) {
|
|
cpumask_clear(&cluster_cpus);
|
|
walt_get_possible_siblings(i, &cluster_cpus);
|
|
if (cpumask_empty(&cluster_cpus)) {
|
|
WARN(1, "WALT: Invalid cpu topology!!");
|
|
cleanup_clusters(&new_head);
|
|
return;
|
|
}
|
|
cpumask_andnot(&cpus, &cpus, &cluster_cpus);
|
|
add_cluster(&cluster_cpus, &new_head);
|
|
}
|
|
|
|
align_clusters(&new_head);
|
|
assign_cluster_ids(&new_head);
|
|
|
|
list_for_each_entry(cluster, &new_head, list) {
|
|
struct cpufreq_policy *policy;
|
|
|
|
policy = cpufreq_cpu_get_raw(cluster_first_cpu(cluster));
|
|
/*
|
|
* walt_update_cluster_topology() must be called AFTER policies
|
|
* for all cpus are initialized. If not, simply BUG().
|
|
*/
|
|
WALT_PANIC(!policy);
|
|
|
|
if (policy) {
|
|
cluster->max_possible_freq = policy->cpuinfo.max_freq;
|
|
cluster->max_freq = policy->max;
|
|
for_each_cpu(i, &cluster->cpus) {
|
|
wrq = &per_cpu(walt_rq, i);
|
|
cpumask_copy(&wrq->freq_domain_cpumask,
|
|
policy->related_cpus);
|
|
}
|
|
cpuinfo_max_freq_cached = (cpuinfo_max_freq_cached >
|
|
policy->cpuinfo.max_freq) ? cpuinfo_max_freq_cached
|
|
: policy->cpuinfo.max_freq;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Ensure cluster ids are visible to all CPUs before making
|
|
* cluster_head visible.
|
|
*/
|
|
move_list(&cluster_head, &new_head, false);
|
|
update_all_clusters_stats();
|
|
|
|
init_cpu_array();
|
|
build_cpu_array();
|
|
find_cache_siblings();
|
|
|
|
create_util_to_cost();
|
|
walt_clusters_parsed = true;
|
|
}
|
|
|
|
static void walt_init_cycle_counter(void)
|
|
{
|
|
char *walt_cycle_cntr_path = "/soc/walt";
|
|
struct device_node *np = NULL;
|
|
|
|
if (soc_feat(SOC_ENABLE_SW_CYCLE_COUNTER_BIT)) {
|
|
walt_cycle_counter_init();
|
|
} else {
|
|
np = of_find_node_by_path(walt_cycle_cntr_path);
|
|
of_platform_populate(np, NULL, NULL, NULL);
|
|
}
|
|
|
|
wait_for_completion_interruptible(&walt_get_cycle_counts_cb_completion);
|
|
}
|
|
|
|
static void transfer_busy_time(struct rq *rq,
|
|
struct walt_related_thread_group *grp,
|
|
struct task_struct *p, int event);
|
|
|
|
/*
|
|
* Enable colocation and frequency aggregation for all threads in a process.
|
|
* The children inherits the group id from the parent.
|
|
*/
|
|
|
|
static struct walt_related_thread_group
|
|
*related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
|
|
static LIST_HEAD(active_related_thread_groups);
|
|
static DEFINE_RWLOCK(related_thread_group_lock);
|
|
|
|
static inline
|
|
void update_best_cluster(struct walt_related_thread_group *grp,
|
|
u64 combined_demand, bool boost)
|
|
{
|
|
if (boost) {
|
|
/*
|
|
* since we are in boost, we can keep grp on min, the boosts
|
|
* will ensure tasks get to bigs
|
|
*/
|
|
grp->skip_min = false;
|
|
return;
|
|
}
|
|
|
|
if (is_suh_max())
|
|
combined_demand = sched_group_upmigrate;
|
|
|
|
if (!grp->skip_min) {
|
|
if (combined_demand >= sched_group_upmigrate)
|
|
grp->skip_min = true;
|
|
return;
|
|
}
|
|
if (combined_demand < sched_group_downmigrate) {
|
|
if (!sysctl_sched_coloc_downmigrate_ns ||
|
|
(grp->last_update - grp->start_ktime_ts) <
|
|
sysctl_sched_hyst_min_coloc_ns) {
|
|
grp->downmigrate_ts = 0;
|
|
grp->skip_min = false;
|
|
return;
|
|
}
|
|
if (!grp->downmigrate_ts) {
|
|
grp->downmigrate_ts = grp->last_update;
|
|
return;
|
|
}
|
|
if (grp->last_update - grp->downmigrate_ts >
|
|
sysctl_sched_coloc_downmigrate_ns) {
|
|
grp->downmigrate_ts = 0;
|
|
grp->skip_min = false;
|
|
}
|
|
} else if (grp->downmigrate_ts)
|
|
grp->downmigrate_ts = 0;
|
|
}
|
|
|
|
static void _set_preferred_cluster(struct walt_related_thread_group *grp)
|
|
{
|
|
struct task_struct *p;
|
|
u64 combined_demand = 0;
|
|
bool group_boost = false;
|
|
u64 wallclock;
|
|
bool prev_skip_min = grp->skip_min;
|
|
struct walt_task_struct *wts;
|
|
|
|
if (sched_group_upmigrate == 0) {
|
|
grp->skip_min = false;
|
|
goto out;
|
|
}
|
|
|
|
if (list_empty(&grp->tasks)) {
|
|
grp->skip_min = false;
|
|
goto out;
|
|
}
|
|
|
|
if (!hmp_capable()) {
|
|
grp->skip_min = false;
|
|
goto out;
|
|
}
|
|
|
|
wallclock = walt_sched_clock();
|
|
|
|
/*
|
|
* wakeup of two or more related tasks could race with each other and
|
|
* could result in multiple calls to _set_preferred_cluster being issued
|
|
* at same time. Avoid overhead in such cases of rechecking preferred
|
|
* cluster
|
|
*/
|
|
if (wallclock - grp->last_update < sched_ravg_window / 10)
|
|
return;
|
|
|
|
list_for_each_entry(wts, &grp->tasks, grp_list) {
|
|
p = wts_to_ts(wts);
|
|
if (task_boost_policy(p) == SCHED_BOOST_ON_BIG) {
|
|
group_boost = true;
|
|
break;
|
|
}
|
|
|
|
if (wts->mark_start < wallclock -
|
|
(sched_ravg_window * RAVG_HIST_SIZE))
|
|
continue;
|
|
|
|
combined_demand += wts->coloc_demand;
|
|
if (!trace_sched_set_preferred_cluster_enabled()) {
|
|
if (combined_demand > sched_group_upmigrate)
|
|
break;
|
|
}
|
|
}
|
|
|
|
grp->last_update = wallclock;
|
|
update_best_cluster(grp, combined_demand, group_boost);
|
|
|
|
out:
|
|
trace_sched_set_preferred_cluster(grp, combined_demand, prev_skip_min,
|
|
sched_group_upmigrate, sched_group_downmigrate);
|
|
if (grp->id == DEFAULT_CGROUP_COLOC_ID
|
|
&& grp->skip_min != prev_skip_min) {
|
|
if (grp->skip_min)
|
|
grp->start_ktime_ts = wallclock;
|
|
else
|
|
grp->start_ktime_ts = 0;
|
|
sched_update_hyst_times();
|
|
}
|
|
}
|
|
|
|
static void set_preferred_cluster(struct walt_related_thread_group *grp)
|
|
{
|
|
raw_spin_lock(&grp->lock);
|
|
_set_preferred_cluster(grp);
|
|
raw_spin_unlock(&grp->lock);
|
|
}
|
|
|
|
static int update_preferred_cluster(struct walt_related_thread_group *grp,
|
|
struct task_struct *p, u32 old_load, bool from_tick)
|
|
{
|
|
u32 new_load = task_load(p);
|
|
|
|
if (!grp)
|
|
return 0;
|
|
|
|
if (unlikely(from_tick && is_suh_max()))
|
|
return 1;
|
|
|
|
/*
|
|
* Update if task's load has changed significantly or a complete window
|
|
* has passed since we last updated preference
|
|
*/
|
|
|
|
if (abs(new_load - old_load) > sched_ravg_window / 4)
|
|
return 1;
|
|
|
|
if (walt_sched_clock() - grp->last_update > sched_ravg_window)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define ADD_TASK 0
|
|
#define REM_TASK 1
|
|
|
|
struct walt_related_thread_group*
|
|
lookup_related_thread_group(unsigned int group_id)
|
|
{
|
|
return related_thread_groups[group_id];
|
|
}
|
|
|
|
static int alloc_related_thread_groups(void)
|
|
{
|
|
int i;
|
|
struct walt_related_thread_group *grp;
|
|
|
|
/* groupd_id = 0 is invalid as it's special id to remove group. */
|
|
for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
|
|
grp = kzalloc(sizeof(*grp), GFP_ATOMIC | GFP_NOWAIT);
|
|
BUG_ON(!grp);
|
|
|
|
grp->id = i;
|
|
INIT_LIST_HEAD(&grp->tasks);
|
|
INIT_LIST_HEAD(&grp->list);
|
|
raw_spin_lock_init(&grp->lock);
|
|
|
|
related_thread_groups[i] = grp;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void remove_task_from_group(struct task_struct *p)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
struct walt_related_thread_group *grp = wts->grp;
|
|
struct rq *rq;
|
|
int empty_group = 1;
|
|
struct rq_flags rf;
|
|
|
|
raw_spin_lock(&grp->lock);
|
|
|
|
rq = __task_rq_lock(p, &rf);
|
|
transfer_busy_time(rq, wts->grp, p, REM_TASK);
|
|
list_del_init(&wts->grp_list);
|
|
rcu_assign_pointer(wts->grp, NULL);
|
|
__task_rq_unlock(rq, &rf);
|
|
|
|
if (!list_empty(&grp->tasks)) {
|
|
empty_group = 0;
|
|
_set_preferred_cluster(grp);
|
|
}
|
|
|
|
raw_spin_unlock(&grp->lock);
|
|
|
|
/* Reserved groups cannot be destroyed */
|
|
if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID)
|
|
/*
|
|
* We test whether grp->list is attached with list_empty()
|
|
* hence re-init the list after deletion.
|
|
*/
|
|
list_del_init(&grp->list);
|
|
}
|
|
|
|
static int
|
|
add_task_to_group(struct task_struct *p, struct walt_related_thread_group *grp)
|
|
{
|
|
struct rq *rq;
|
|
struct rq_flags rf;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
raw_spin_lock(&grp->lock);
|
|
|
|
/*
|
|
* Change wts->grp under rq->lock. Will prevent races with read-side
|
|
* reference of wts->grp in various hot-paths
|
|
*/
|
|
rq = __task_rq_lock(p, &rf);
|
|
transfer_busy_time(rq, grp, p, ADD_TASK);
|
|
list_add(&wts->grp_list, &grp->tasks);
|
|
rcu_assign_pointer(wts->grp, grp);
|
|
__task_rq_unlock(rq, &rf);
|
|
|
|
_set_preferred_cluster(grp);
|
|
|
|
raw_spin_unlock(&grp->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_UCLAMP_TASK_GROUP
|
|
static inline bool uclamp_task_colocated(struct task_struct *p)
|
|
{
|
|
struct cgroup_subsys_state *css;
|
|
struct task_group *tg;
|
|
bool colocate;
|
|
struct walt_task_group *wtg;
|
|
|
|
rcu_read_lock();
|
|
css = task_css(p, cpu_cgrp_id);
|
|
if (!css) {
|
|
rcu_read_unlock();
|
|
return false;
|
|
}
|
|
tg = container_of(css, struct task_group, css);
|
|
wtg = (struct walt_task_group *) tg->android_vendor_data1;
|
|
colocate = wtg->colocate;
|
|
rcu_read_unlock();
|
|
|
|
return colocate;
|
|
}
|
|
#else
|
|
static inline bool uclamp_task_colocated(struct task_struct *p)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_UCLAMP_TASK_GROUP */
|
|
|
|
static void add_new_task_to_grp(struct task_struct *new)
|
|
{
|
|
unsigned long flags;
|
|
struct walt_related_thread_group *grp;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) new->android_vendor_data1;
|
|
|
|
/*
|
|
* If the task does not belong to colocated schedtune
|
|
* cgroup, nothing to do. We are checking this without
|
|
* lock. Even if there is a race, it will be added
|
|
* to the co-located cgroup via cgroup attach.
|
|
*/
|
|
if (!uclamp_task_colocated(new))
|
|
return;
|
|
|
|
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
|
write_lock_irqsave(&related_thread_group_lock, flags);
|
|
|
|
/*
|
|
* It's possible that someone already added the new task to the
|
|
* group. or it might have taken out from the colocated schedtune
|
|
* cgroup. check these conditions under lock.
|
|
*/
|
|
if (!uclamp_task_colocated(new) || wts->grp) {
|
|
write_unlock_irqrestore(&related_thread_group_lock, flags);
|
|
return;
|
|
}
|
|
|
|
raw_spin_lock(&grp->lock);
|
|
|
|
rcu_assign_pointer(wts->grp, grp);
|
|
list_add(&wts->grp_list, &grp->tasks);
|
|
|
|
raw_spin_unlock(&grp->lock);
|
|
write_unlock_irqrestore(&related_thread_group_lock, flags);
|
|
}
|
|
|
|
static int __sched_set_group_id(struct task_struct *p, unsigned int group_id)
|
|
{
|
|
int rc = 0;
|
|
unsigned long flags;
|
|
struct walt_related_thread_group *grp = NULL;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
if (group_id >= MAX_NUM_CGROUP_COLOC_ID)
|
|
return -EINVAL;
|
|
|
|
if (unlikely(!walt_flag_test(p, WALT_INIT_BIT)))
|
|
return -EINVAL;
|
|
|
|
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
write_lock(&related_thread_group_lock);
|
|
|
|
/* Switching from one group to another directly is not permitted */
|
|
if ((!wts->grp && !group_id) || (wts->grp && group_id))
|
|
goto done;
|
|
|
|
if (!group_id) {
|
|
remove_task_from_group(p);
|
|
goto done;
|
|
}
|
|
|
|
grp = lookup_related_thread_group(group_id);
|
|
if (list_empty(&grp->list))
|
|
list_add(&grp->list, &active_related_thread_groups);
|
|
|
|
rc = add_task_to_group(p, grp);
|
|
done:
|
|
write_unlock(&related_thread_group_lock);
|
|
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
|
return rc;
|
|
}
|
|
|
|
int sched_set_group_id(struct task_struct *p, unsigned int group_id)
|
|
{
|
|
/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
|
|
if (group_id == DEFAULT_CGROUP_COLOC_ID)
|
|
return -EINVAL;
|
|
|
|
return __sched_set_group_id(p, group_id);
|
|
}
|
|
|
|
unsigned int sched_get_group_id(struct task_struct *p)
|
|
{
|
|
unsigned int group_id;
|
|
struct walt_related_thread_group *grp;
|
|
|
|
rcu_read_lock();
|
|
grp = task_related_thread_group(p);
|
|
group_id = grp ? grp->id : 0;
|
|
rcu_read_unlock();
|
|
|
|
return group_id;
|
|
}
|
|
|
|
/*
|
|
* We create a default colocation group at boot. There is no need to
|
|
* synchronize tasks between cgroups at creation time because the
|
|
* correct cgroup hierarchy is not available at boot. Therefore cgroup
|
|
* colocation is turned off by default even though the colocation group
|
|
* itself has been allocated. Furthermore this colocation group cannot
|
|
* be destroyted once it has been created. All of this has been as part
|
|
* of runtime optimizations.
|
|
*
|
|
* The job of synchronizing tasks to the colocation group is done when
|
|
* the colocation flag in the cgroup is turned on.
|
|
*/
|
|
static int create_default_coloc_group(void)
|
|
{
|
|
struct walt_related_thread_group *grp = NULL;
|
|
unsigned long flags;
|
|
|
|
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
|
write_lock_irqsave(&related_thread_group_lock, flags);
|
|
list_add(&grp->list, &active_related_thread_groups);
|
|
write_unlock_irqrestore(&related_thread_group_lock, flags);
|
|
return 0;
|
|
}
|
|
|
|
static void walt_update_tg_pointer(struct cgroup_subsys_state *css)
|
|
{
|
|
if (!strcmp(css->cgroup->kn->name, "top-app"))
|
|
walt_init_topapp_tg(css_tg(css));
|
|
else if (!strcmp(css->cgroup->kn->name, "foreground"))
|
|
walt_init_foreground_tg(css_tg(css));
|
|
else if (!strcmp(css->cgroup->kn->name, "foreground-boost"))
|
|
walt_init_foregroundboost_tg(css_tg(css));
|
|
else
|
|
walt_init_tg(css_tg(css));
|
|
}
|
|
|
|
void walt_kick_cpu(int cpu)
|
|
{
|
|
unsigned int flags = NOHZ_KICK_MASK;
|
|
|
|
if (cpu == -1)
|
|
return;
|
|
|
|
/*
|
|
* Access to rq::nohz_csd is serialized by NOHZ_KICK_MASK; he who sets
|
|
* the first flag owns it; cleared by nohz_csd_func().
|
|
*/
|
|
flags = atomic_fetch_or(flags, nohz_flags(cpu));
|
|
if (flags & NOHZ_KICK_MASK)
|
|
return;
|
|
|
|
/*
|
|
* This way we generate an IPI on the target CPU which
|
|
* is idle. And the softirq performing nohz idle load balance
|
|
* will be run before returning from the IPI.
|
|
*/
|
|
smp_call_function_single_async(cpu, &cpu_rq(cpu)->nohz_csd);
|
|
}
|
|
|
|
|
|
static void android_rvh_cpu_cgroup_online(void *unused, struct cgroup_subsys_state *css)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
walt_update_tg_pointer(css);
|
|
}
|
|
|
|
static void android_rvh_cpu_cgroup_attach(void *unused,
|
|
struct cgroup_taskset *tset)
|
|
{
|
|
struct task_struct *task;
|
|
struct cgroup_subsys_state *css;
|
|
struct task_group *tg;
|
|
struct walt_task_group *wtg;
|
|
unsigned int grp_id;
|
|
int ret;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
cgroup_taskset_first(tset, &css);
|
|
if (!css)
|
|
return;
|
|
|
|
tg = container_of(css, struct task_group, css);
|
|
wtg = (struct walt_task_group *) tg->android_vendor_data1;
|
|
|
|
cgroup_taskset_for_each(task, css, tset) {
|
|
grp_id = wtg->colocate ? DEFAULT_CGROUP_COLOC_ID : 0;
|
|
ret = __sched_set_group_id(task, grp_id);
|
|
trace_sched_cgroup_attach(task, grp_id, ret);
|
|
}
|
|
}
|
|
|
|
static bool is_cluster_hosting_top_app(struct walt_sched_cluster *cluster)
|
|
{
|
|
struct walt_related_thread_group *grp;
|
|
bool grp_on_min;
|
|
|
|
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
|
|
|
if (!grp)
|
|
return false;
|
|
|
|
grp_on_min = !grp->skip_min && (boost_policy != SCHED_BOOST_ON_BIG);
|
|
|
|
return (is_min_capacity_cluster(cluster) == grp_on_min);
|
|
}
|
|
|
|
static void note_task_waking(struct task_struct *p, u64 wallclock)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
wts->last_wake_ts = wallclock;
|
|
}
|
|
|
|
/*
|
|
* Task's cpu usage is accounted in:
|
|
* wrq->curr/prev_runnable_sum, when its ->grp is NULL
|
|
* grp->cpu_time[cpu]->curr/prev_runnable_sum, when its ->grp is !NULL
|
|
*
|
|
* Transfer task's cpu usage between those counters when transitioning between
|
|
* groups
|
|
*/
|
|
static void transfer_busy_time(struct rq *rq,
|
|
struct walt_related_thread_group *grp,
|
|
struct task_struct *p, int event)
|
|
{
|
|
u64 wallclock;
|
|
struct group_cpu_time *cpu_time;
|
|
u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
|
|
u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
|
|
u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
|
|
u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
|
|
int migrate_type;
|
|
int cpu = cpu_of(rq);
|
|
bool new_task;
|
|
int i;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
wallclock = walt_sched_clock();
|
|
|
|
walt_update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
|
|
|
|
if (wts->window_start != wrq->window_start)
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"CPU%d: %s event=%d task %s(%d)'s ws=%llu not equal to rq %d's ws=%llu",
|
|
raw_smp_processor_id(), __func__, event, p->comm, p->pid,
|
|
wts->window_start, rq->cpu, wrq->window_start);
|
|
|
|
new_task = is_new_task(p);
|
|
|
|
if (wts->enqueue_after_migration != 0) {
|
|
wallclock = walt_sched_clock();
|
|
migrate_busy_time_addition(p, cpu_of(rq), wallclock);
|
|
wts->enqueue_after_migration = 0;
|
|
}
|
|
|
|
cpu_time = &wrq->grp_time;
|
|
if (event == ADD_TASK) {
|
|
migrate_type = RQ_TO_GROUP;
|
|
|
|
src_curr_runnable_sum = &wrq->curr_runnable_sum;
|
|
dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
|
|
src_prev_runnable_sum = &wrq->prev_runnable_sum;
|
|
dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
|
|
|
|
src_nt_curr_runnable_sum = &wrq->nt_curr_runnable_sum;
|
|
dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
|
src_nt_prev_runnable_sum = &wrq->nt_prev_runnable_sum;
|
|
dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
|
|
|
if (*src_curr_runnable_sum < wts->curr_window_cpu[cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU=%d event=%d src_crs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event, *src_curr_runnable_sum,
|
|
wts->curr_window_cpu[cpu]);
|
|
*src_curr_runnable_sum = wts->curr_window_cpu[cpu];
|
|
}
|
|
*src_curr_runnable_sum -= wts->curr_window_cpu[cpu];
|
|
|
|
if (*src_prev_runnable_sum < wts->prev_window_cpu[cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU=%d event=%d src_prs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event, *src_prev_runnable_sum,
|
|
wts->prev_window_cpu[cpu]);
|
|
*src_prev_runnable_sum = wts->prev_window_cpu[cpu];
|
|
}
|
|
*src_prev_runnable_sum -= wts->prev_window_cpu[cpu];
|
|
|
|
if (new_task) {
|
|
if (*src_nt_curr_runnable_sum < wts->curr_window_cpu[cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU=%d event=%d src_nt_crs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event,
|
|
*src_nt_curr_runnable_sum,
|
|
wts->curr_window_cpu[cpu]);
|
|
*src_nt_curr_runnable_sum = wts->curr_window_cpu[cpu];
|
|
}
|
|
*src_nt_curr_runnable_sum -=
|
|
wts->curr_window_cpu[cpu];
|
|
|
|
if (*src_nt_prev_runnable_sum < wts->prev_window_cpu[cpu]) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU=%d event=%d src_nt_prs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event,
|
|
*src_nt_prev_runnable_sum,
|
|
wts->prev_window_cpu[cpu]);
|
|
*src_nt_prev_runnable_sum = wts->prev_window_cpu[cpu];
|
|
}
|
|
*src_nt_prev_runnable_sum -=
|
|
wts->prev_window_cpu[cpu];
|
|
}
|
|
|
|
update_cluster_load_subtractions(p, cpu,
|
|
wrq->window_start, new_task);
|
|
|
|
} else {
|
|
migrate_type = GROUP_TO_RQ;
|
|
|
|
src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
|
|
dst_curr_runnable_sum = &wrq->curr_runnable_sum;
|
|
src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
|
|
dst_prev_runnable_sum = &wrq->prev_runnable_sum;
|
|
|
|
src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
|
|
dst_nt_curr_runnable_sum = &wrq->nt_curr_runnable_sum;
|
|
src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
|
|
dst_nt_prev_runnable_sum = &wrq->nt_prev_runnable_sum;
|
|
|
|
if (*src_curr_runnable_sum < wts->curr_window) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"WALT-UG pid=%u CPU=%d event=%d src_crs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event, *src_curr_runnable_sum,
|
|
wts->curr_window);
|
|
*src_curr_runnable_sum = wts->curr_window;
|
|
}
|
|
*src_curr_runnable_sum -= wts->curr_window;
|
|
|
|
if (*src_prev_runnable_sum < wts->prev_window) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU=%d event=%d src_prs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event, *src_prev_runnable_sum,
|
|
wts->prev_window);
|
|
*src_prev_runnable_sum = wts->prev_window;
|
|
}
|
|
*src_prev_runnable_sum -= wts->prev_window;
|
|
|
|
if (new_task) {
|
|
if (*src_nt_curr_runnable_sum < wts->curr_window) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU=%d event=%d src_nt_crs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event,
|
|
*src_nt_curr_runnable_sum,
|
|
wts->curr_window);
|
|
*src_nt_curr_runnable_sum = wts->curr_window;
|
|
}
|
|
*src_nt_curr_runnable_sum -= wts->curr_window;
|
|
|
|
if (*src_nt_prev_runnable_sum < wts->prev_window) {
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"pid=%u CPU=%d event=%d src_nt_prs=%llu is lesser than task_contrib=%u",
|
|
p->pid, cpu, event,
|
|
*src_nt_prev_runnable_sum,
|
|
wts->prev_window);
|
|
*src_nt_prev_runnable_sum = wts->prev_window;
|
|
}
|
|
*src_nt_prev_runnable_sum -= wts->prev_window;
|
|
}
|
|
|
|
/*
|
|
* Need to reset curr/prev windows for all CPUs, not just the
|
|
* ones in the same cluster. Since inter cluster migrations
|
|
* did not result in the appropriate book keeping, the values
|
|
* per CPU would be inaccurate.
|
|
*/
|
|
for_each_possible_cpu(i) {
|
|
wts->curr_window_cpu[i] = 0;
|
|
wts->prev_window_cpu[i] = 0;
|
|
}
|
|
}
|
|
|
|
*dst_curr_runnable_sum += wts->curr_window;
|
|
*dst_prev_runnable_sum += wts->prev_window;
|
|
if (new_task) {
|
|
*dst_nt_curr_runnable_sum += wts->curr_window;
|
|
*dst_nt_prev_runnable_sum += wts->prev_window;
|
|
}
|
|
|
|
/*
|
|
* When a task enter or exits a group, it's curr and prev windows are
|
|
* moved to a single CPU. This behavior might be sub-optimal in the
|
|
* exit case, however, it saves us the overhead of handling inter
|
|
* cluster migration fixups while the task is part of a related group.
|
|
*/
|
|
wts->curr_window_cpu[cpu] = wts->curr_window;
|
|
wts->prev_window_cpu[cpu] = wts->prev_window;
|
|
|
|
trace_sched_migration_update_sum(p, migrate_type, rq);
|
|
}
|
|
|
|
bool is_rtgb_active(void)
|
|
{
|
|
struct walt_related_thread_group *grp;
|
|
|
|
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
|
return grp && grp->skip_min;
|
|
}
|
|
|
|
u64 get_rtgb_active_time(void)
|
|
{
|
|
struct walt_related_thread_group *grp;
|
|
u64 now = walt_sched_clock();
|
|
|
|
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
|
|
|
if (grp && grp->skip_min && grp->start_ktime_ts)
|
|
return now - grp->start_ktime_ts;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void walt_init_window_dep(void);
|
|
static void walt_tunables_fixup(void)
|
|
{
|
|
if (likely(num_sched_clusters > 0))
|
|
walt_update_group_thresholds();
|
|
walt_init_window_dep();
|
|
}
|
|
|
|
static void walt_update_irqload(struct rq *rq)
|
|
{
|
|
u64 irq_delta = 0;
|
|
unsigned int nr_windows = 0;
|
|
u64 cur_irq_time;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
u64 last_irq_window = READ_ONCE(wrq->last_irq_window);
|
|
|
|
if (wrq->window_start > last_irq_window)
|
|
nr_windows = div64_u64(wrq->window_start - last_irq_window,
|
|
sched_ravg_window);
|
|
|
|
/* Decay CPU's irqload by 3/4 for each window. */
|
|
if (nr_windows < 10)
|
|
wrq->avg_irqload = mult_frac(wrq->avg_irqload, 3, 4);
|
|
else
|
|
wrq->avg_irqload = 0;
|
|
|
|
cur_irq_time = irq_time_read(cpu_of(rq));
|
|
if (cur_irq_time > wrq->prev_irq_time)
|
|
irq_delta = cur_irq_time - wrq->prev_irq_time;
|
|
|
|
wrq->avg_irqload += irq_delta;
|
|
wrq->prev_irq_time = cur_irq_time;
|
|
|
|
if (nr_windows < SCHED_HIGH_IRQ_TIMEOUT)
|
|
wrq->high_irqload = (wrq->avg_irqload >=
|
|
walt_cpu_high_irqload);
|
|
else
|
|
wrq->high_irqload = false;
|
|
}
|
|
|
|
/**
|
|
* __walt_irq_work_locked() - common function to process work
|
|
* @is_migration: if true, performing migration work, else rollover
|
|
* @is_asym_migration: if true, performing migration involving an asym cap sibling
|
|
* @lock_cpus: mask of the cpus involved in the operation.
|
|
*
|
|
* In rq locked context, update the cluster group load and find
|
|
* the load of the min cluster, while tracking the total aggregate
|
|
* work load. Update the cpufreq through the walt governor,
|
|
* based upon the new load calculated.
|
|
*
|
|
* For the window rollover case lock_cpus will be all possible cpus,
|
|
* and for migrations it will include the cpus from the two clusters
|
|
* involved in the migration.
|
|
*/
|
|
static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migration,
|
|
bool is_pipeline_sync_migration, struct cpumask *lock_cpus)
|
|
{
|
|
struct walt_sched_cluster *cluster;
|
|
struct rq *rq;
|
|
int cpu;
|
|
u64 wc;
|
|
u64 total_grp_load = 0;
|
|
unsigned long flags;
|
|
struct walt_rq *wrq;
|
|
|
|
wc = walt_sched_clock();
|
|
if (!is_migration)
|
|
walt_load_reported_window = atomic64_read(&walt_irq_work_lastq_ws);
|
|
for_each_sched_cluster(cluster) {
|
|
u64 aggr_grp_load = 0;
|
|
|
|
raw_spin_lock(&cluster->load_lock);
|
|
for_each_cpu(cpu, &cluster->cpus) {
|
|
rq = cpu_rq(cpu);
|
|
wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
if (rq->curr) {
|
|
/* only update ravg for locked cpus */
|
|
if (cpumask_intersects(lock_cpus, &cluster->cpus)) {
|
|
if (unlikely(!raw_spin_is_locked(&rq->__lock))) {
|
|
WALT_BUG(WALT_BUG_WALT, NULL, "%s unlocked cpu=%d is_migration=%d is_asym_migration=%d is_pipeline_sync_migration=%d lock_cpus=%*pbl suspended=%d last_clk=%llu stack[%pS <= %pS <= %pS]\n",
|
|
__func__, rq->cpu, is_migration, is_asym_migration,
|
|
is_pipeline_sync_migration,
|
|
cpumask_pr_args(lock_cpus), walt_clock_suspended,
|
|
sched_clock_last, (void *)CALLER_ADDR0,
|
|
(void *)CALLER_ADDR1, (void *)CALLER_ADDR2);
|
|
}
|
|
walt_update_task_ravg(rq->curr, rq,
|
|
TASK_UPDATE, wc, 0);
|
|
account_load_subtractions(rq);
|
|
}
|
|
|
|
/* update aggr_grp_load for all clusters, all cpus */
|
|
aggr_grp_load +=
|
|
wrq->grp_time.prev_runnable_sum;
|
|
}
|
|
}
|
|
raw_spin_unlock(&cluster->load_lock);
|
|
|
|
cluster->aggr_grp_load = aggr_grp_load;
|
|
total_grp_load += aggr_grp_load;
|
|
}
|
|
|
|
if (total_grp_load)
|
|
rtgb_active = is_rtgb_active();
|
|
else
|
|
rtgb_active = false;
|
|
|
|
if (!is_migration && sysctl_sched_user_hint && time_after(jiffies,
|
|
sched_user_hint_reset_time))
|
|
sysctl_sched_user_hint = 0;
|
|
|
|
for_each_sched_cluster(cluster) {
|
|
cpumask_t cluster_online_cpus;
|
|
unsigned int num_cpus, i = 1;
|
|
|
|
/* for migration, skip unnotified clusters */
|
|
if (is_migration && !cpumask_intersects(lock_cpus, &cluster->cpus))
|
|
continue;
|
|
|
|
cpumask_and(&cluster_online_cpus, &cluster->cpus,
|
|
cpu_online_mask);
|
|
num_cpus = cpumask_weight(&cluster_online_cpus);
|
|
for_each_cpu(cpu, &cluster_online_cpus) {
|
|
int wflag = 0;
|
|
|
|
rq = cpu_rq(cpu);
|
|
wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
if (is_migration) {
|
|
if (wrq->notif_pending) {
|
|
wrq->notif_pending = false;
|
|
wflag |= WALT_CPUFREQ_IC_MIGRATION_BIT;
|
|
}
|
|
if (is_asym_migration)
|
|
wflag |= WALT_CPUFREQ_ASYM_FIXUP_BIT;
|
|
if (is_pipeline_sync_migration)
|
|
wflag |= WALT_CPUFREQ_SHARED_RAIL_BIT;
|
|
} else {
|
|
wflag |= WALT_CPUFREQ_ROLLOVER_BIT;
|
|
}
|
|
|
|
if (i == num_cpus)
|
|
waltgov_run_callback(cpu_rq(cpu), wflag);
|
|
else
|
|
waltgov_run_callback(cpu_rq(cpu), wflag |
|
|
WALT_CPUFREQ_CONTINUE_BIT);
|
|
i++;
|
|
|
|
if (!is_migration)
|
|
walt_update_irqload(rq);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the window change request is in pending, good place to
|
|
* change sched_ravg_window since all rq locks are acquired.
|
|
*
|
|
* If the current window roll over is delayed such that the
|
|
* mark_start (current wallclock with which roll over is done)
|
|
* of the current task went past the window start with the
|
|
* updated new window size, delay the update to the next
|
|
* window roll over. Otherwise the CPU counters (prs and crs) are
|
|
* not rolled over properly as mark_start > window_start.
|
|
*/
|
|
if (!is_migration) {
|
|
spin_lock_irqsave(&sched_ravg_window_lock, flags);
|
|
wrq = &per_cpu(walt_rq, cpu_of(this_rq()));
|
|
if ((sched_ravg_window != new_sched_ravg_window) &&
|
|
(wc < wrq->window_start + new_sched_ravg_window)) {
|
|
sched_ravg_window_change_time = walt_sched_clock();
|
|
trace_sched_ravg_window_change(sched_ravg_window,
|
|
new_sched_ravg_window,
|
|
sched_ravg_window_change_time);
|
|
sched_ravg_window = new_sched_ravg_window;
|
|
walt_tunables_fixup();
|
|
}
|
|
spin_unlock_irqrestore(&sched_ravg_window_lock, flags);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* irq_work_restrict_to_mig_clusters() - only allow notified clusters
|
|
* @lock_cpus: mask of the cpus for which the runque should be locked.
|
|
*
|
|
* Remove cpus in clusters that are not part of the migration, using
|
|
* the notif_pending flag to track.
|
|
*
|
|
* This is only valid for the migration irq work.
|
|
*/
|
|
static inline void irq_work_restrict_to_mig_clusters(cpumask_t *lock_cpus)
|
|
{
|
|
struct walt_sched_cluster *cluster;
|
|
struct rq *rq;
|
|
struct walt_rq *wrq;
|
|
int cpu;
|
|
|
|
for_each_sched_cluster(cluster) {
|
|
bool keep_locked = false;
|
|
|
|
for_each_cpu(cpu, &cluster->cpus) {
|
|
rq = cpu_rq(cpu);
|
|
wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
/* remove this cluster if it's not being notified */
|
|
if (wrq->notif_pending) {
|
|
keep_locked = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!keep_locked)
|
|
cpumask_andnot(lock_cpus, lock_cpus, &cluster->cpus);
|
|
}
|
|
}
|
|
|
|
void update_cpu_capacity_helper(int cpu)
|
|
{
|
|
unsigned long fmax_capacity = arch_scale_cpu_capacity(cpu);
|
|
unsigned long thermal_pressure = arch_scale_thermal_pressure(cpu);
|
|
unsigned long thermal_cap, old;
|
|
struct walt_sched_cluster *cluster;
|
|
struct rq *rq = cpu_rq(cpu);
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
/*
|
|
* thermal_pressure = cpu_scale - curr_cap_as_per_thermal.
|
|
* so,
|
|
* curr_cap_as_per_thermal = cpu_scale - thermal_pressure.
|
|
*/
|
|
|
|
thermal_cap = fmax_capacity - thermal_pressure;
|
|
|
|
cluster = cpu_cluster(cpu);
|
|
/* reduce the fmax_capacity under cpufreq constraints */
|
|
if (cluster->walt_internal_freq_limit != cluster->max_possible_freq)
|
|
fmax_capacity = mult_frac(fmax_capacity,
|
|
min(cluster->walt_internal_freq_limit, cluster->max_freq),
|
|
cluster->max_possible_freq);
|
|
|
|
old = rq->cpu_capacity_orig;
|
|
rq->cpu_capacity_orig = min(fmax_capacity, thermal_cap);
|
|
|
|
if (old != rq->cpu_capacity_orig)
|
|
trace_update_cpu_capacity(cpu, fmax_capacity, rq->cpu_capacity_orig);
|
|
}
|
|
|
|
/*
|
|
* The intention of this hook is to update cpu_capacity_orig as well as
|
|
* (*capacity), otherwise we will end up capacity_of() > capacity_orig_of().
|
|
*/
|
|
static void android_rvh_update_cpu_capacity(void *unused, int cpu, unsigned long *capacity)
|
|
{
|
|
unsigned long rt_pressure = arch_scale_cpu_capacity(cpu) - *capacity;
|
|
|
|
update_cpu_capacity_helper(cpu);
|
|
*capacity = max((int)(cpu_rq(cpu)->cpu_capacity_orig - rt_pressure), 0);
|
|
}
|
|
|
|
/*
|
|
* big_task_pid is used by the One Big Enqueue Task feature to track the
|
|
* number of big tasks enqueued on largest cluster.
|
|
* big_task_pid = {0, pid, -1}
|
|
* 0 -> indicates there are 0 big tasks enqueued on the CPU
|
|
* pid -> indicates that there is one big task enqueued whose PID
|
|
* is pid
|
|
* -1 -> indicates that there is more than one big task enqueued
|
|
* on the CPU.
|
|
* The big_task_pid value is checked every window rollover
|
|
* and updated according to the rules above.
|
|
*/
|
|
DEFINE_PER_CPU(pid_t, big_task_pid);
|
|
bool is_obet;
|
|
|
|
/*
|
|
* check_obet() needs to be called with all the rq locks held.
|
|
* It resets per cpu big_task_pid and does cpu checks on a
|
|
* single big task.
|
|
*/
|
|
static void check_obet(void)
|
|
{
|
|
struct task_struct *p;
|
|
int is_obet_temp = 0;
|
|
int mid_cluster_cpu, cpu;
|
|
|
|
if (num_sched_clusters < 2)
|
|
return;
|
|
|
|
mid_cluster_cpu = cpumask_first(&cpu_array[0][num_sched_clusters - 2]);
|
|
|
|
for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
|
|
if (per_cpu(big_task_pid, cpu) == -1) {
|
|
is_obet_temp = -1;
|
|
} else if (per_cpu(big_task_pid, cpu) != 0) {
|
|
if (is_obet_temp == 0) {
|
|
is_obet_temp = per_cpu(big_task_pid, cpu);
|
|
} else {
|
|
if (is_obet_temp != per_cpu(big_task_pid, cpu))
|
|
is_obet_temp = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (is_obet_temp == -1 || is_obet_temp == 0)
|
|
is_obet = false;
|
|
else
|
|
is_obet = true;
|
|
|
|
//reset per CPU big_task_pid for the upcoming window
|
|
for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
|
|
pid_t pid = per_cpu(big_task_pid, cpu);
|
|
|
|
if (pid) {
|
|
int task_count = 0;
|
|
int big_task_count = 0;
|
|
|
|
list_for_each_entry(p, &(cpu_rq(cpu)->cfs_tasks),
|
|
se.group_node) {
|
|
task_count++;
|
|
if (!task_fits_max(p, mid_cluster_cpu)) {
|
|
big_task_count++;
|
|
pid = p->pid;
|
|
if (big_task_count == 2)
|
|
break;
|
|
}
|
|
if (task_count == 10)
|
|
break;
|
|
}
|
|
if (task_count == 10)
|
|
per_cpu(big_task_pid, cpu) = -1;
|
|
else if (big_task_count == 0)
|
|
per_cpu(big_task_pid, cpu) = 0;
|
|
else if (big_task_count == 1)
|
|
per_cpu(big_task_pid, cpu) = pid;
|
|
else
|
|
per_cpu(big_task_pid, cpu) = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void check_obet_set_boost(void)
|
|
{
|
|
static bool prev_is_obet;
|
|
bool now_is_obet;
|
|
|
|
//Determine if core_ctl boost is needed
|
|
now_is_obet = is_obet;
|
|
if (prev_is_obet != now_is_obet)
|
|
core_ctl_set_cluster_boost(num_sched_clusters - 1, is_obet);
|
|
prev_is_obet = now_is_obet;
|
|
}
|
|
|
|
#define CORE_UTIL_PERIOD 1000000000
|
|
static void walt_core_utilization(int cpu)
|
|
{
|
|
static u64 sum[WALT_NR_CPUS];
|
|
static u64 timestamp;
|
|
static int nr_windows[WALT_NR_CPUS];
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
|
|
u64 max_capacity = arch_scale_cpu_capacity(cpu);
|
|
|
|
if (wrq->window_start > timestamp + CORE_UTIL_PERIOD) {
|
|
sysctl_sched_walt_core_util[cpu] = sum[cpu] / nr_windows[cpu];
|
|
sum[cpu] = 0;
|
|
nr_windows[cpu] = 0;
|
|
if (cpu == cpumask_last(cpu_online_mask))
|
|
timestamp = wrq->window_start;
|
|
}
|
|
|
|
nr_windows[cpu]++;
|
|
if (max_capacity < wrq->walt_stats.cumulative_runnable_avg_scaled)
|
|
sum[cpu] += max_capacity;
|
|
else
|
|
sum[cpu] += wrq->walt_stats.cumulative_runnable_avg_scaled;
|
|
}
|
|
|
|
DEFINE_PER_CPU(u32, wakeup_ctr);
|
|
/**
|
|
* walt_irq_work() - perform walt irq work for rollover and migration
|
|
*
|
|
* Process a workqueue call scheduled, while running in a hard irq
|
|
* protected context. Handle migration and window rollover work
|
|
* with common funtionality, and on window rollover ask core control
|
|
* to decide if it needs to adjust the active cpus.
|
|
*/
|
|
static void walt_irq_work(struct irq_work *irq_work)
|
|
{
|
|
cpumask_t lock_cpus;
|
|
struct walt_rq *wrq;
|
|
int level;
|
|
int cpu;
|
|
bool is_migration = false, is_asym_migration = false, is_pipeline_sync_migration = false;
|
|
u32 wakeup_ctr_sum = 0;
|
|
struct walt_sched_cluster *cluster;
|
|
bool need_assign_heavy = false;
|
|
|
|
if (irq_work == &walt_migration_irq_work)
|
|
is_migration = true;
|
|
|
|
cpumask_copy(&lock_cpus, cpu_possible_mask);
|
|
|
|
if (is_migration) {
|
|
irq_work_restrict_to_mig_clusters(&lock_cpus);
|
|
|
|
/*
|
|
* if the notif_pending was handled by a previous
|
|
* walt_irq_work invocation, there is no migration
|
|
* work.
|
|
*/
|
|
if (cpumask_empty(&lock_cpus))
|
|
return;
|
|
|
|
if (pipeline_in_progress() && cpumask_intersects(&lock_cpus, &pipeline_sync_cpus)) {
|
|
cpumask_or(&lock_cpus, &lock_cpus, &pipeline_sync_cpus);
|
|
is_pipeline_sync_migration = true;
|
|
}
|
|
if (!is_state1() &&
|
|
cpumask_intersects(&lock_cpus, &asym_cap_sibling_cpus)) {
|
|
cpumask_or(&lock_cpus, &lock_cpus, &asym_cap_sibling_cpus);
|
|
is_asym_migration = true;
|
|
}
|
|
}
|
|
|
|
level = 0;
|
|
for_each_cpu(cpu, &lock_cpus) {
|
|
if (level == 0)
|
|
raw_spin_lock(&cpu_rq(cpu)->__lock);
|
|
else
|
|
raw_spin_lock_nested(&cpu_rq(cpu)->__lock, level);
|
|
level++;
|
|
}
|
|
|
|
__walt_irq_work_locked(is_migration, is_asym_migration,
|
|
is_pipeline_sync_migration, &lock_cpus);
|
|
|
|
if (!is_migration) {
|
|
for_each_cpu(cpu, cpu_online_mask) {
|
|
wakeup_ctr_sum += per_cpu(wakeup_ctr, cpu);
|
|
per_cpu(wakeup_ctr, cpu) = 0;
|
|
|
|
walt_core_utilization(cpu);
|
|
set_cpu_flag(cpu, CPU_FIRST_ENQ_IN_WINDOW, 0);
|
|
}
|
|
|
|
check_obet();
|
|
}
|
|
|
|
for_each_cpu(cpu, &lock_cpus)
|
|
raw_spin_unlock(&cpu_rq(cpu)->__lock);
|
|
|
|
if (!is_migration) {
|
|
wrq = &per_cpu(walt_rq, cpu_of(this_rq()));
|
|
need_assign_heavy = pipeline_check(wrq);
|
|
core_ctl_check(wrq->window_start, wakeup_ctr_sum);
|
|
pipeline_rearrange(wrq, need_assign_heavy);
|
|
for_each_sched_cluster(cluster) {
|
|
update_smart_freq_legacy_reason_hyst_time(cluster);
|
|
}
|
|
check_obet_set_boost();
|
|
}
|
|
}
|
|
|
|
void walt_rotation_checkpoint(int nr_big)
|
|
{
|
|
int i;
|
|
bool prev = walt_rotation_enabled;
|
|
|
|
if (!hmp_capable())
|
|
return;
|
|
|
|
if (!sysctl_sched_walt_rotate_big_tasks || sched_boost_type != NO_BOOST) {
|
|
walt_rotation_enabled = 0;
|
|
return;
|
|
}
|
|
|
|
walt_rotation_enabled = nr_big >= num_possible_cpus();
|
|
|
|
for (i = 0; i < num_sched_clusters; i++) {
|
|
if (walt_rotation_enabled && !prev)
|
|
freq_cap[HIGH_PERF_CAP][i] = high_perf_cluster_freq_cap[i];
|
|
else if (!walt_rotation_enabled && prev)
|
|
freq_cap[HIGH_PERF_CAP][i] = FREQ_QOS_MAX_DEFAULT_VALUE;
|
|
}
|
|
|
|
update_smart_freq_capacities();
|
|
}
|
|
|
|
void walt_fill_ta_data(struct core_ctl_notif_data *data)
|
|
{
|
|
struct walt_related_thread_group *grp;
|
|
unsigned long flags;
|
|
u64 total_demand = 0, wallclock;
|
|
int min_cap_cpu, scale = 1024;
|
|
struct walt_sched_cluster *cluster;
|
|
int i = 0;
|
|
struct walt_task_struct *wts;
|
|
|
|
grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
|
|
|
raw_spin_lock_irqsave(&grp->lock, flags);
|
|
if (list_empty(&grp->tasks)) {
|
|
raw_spin_unlock_irqrestore(&grp->lock, flags);
|
|
goto fill_util;
|
|
}
|
|
|
|
wallclock = walt_sched_clock();
|
|
|
|
list_for_each_entry(wts, &grp->tasks, grp_list) {
|
|
if (wts->mark_start < wallclock -
|
|
(sched_ravg_window * RAVG_HIST_SIZE))
|
|
continue;
|
|
|
|
total_demand += wts->coloc_demand;
|
|
}
|
|
|
|
raw_spin_unlock_irqrestore(&grp->lock, flags);
|
|
|
|
/*
|
|
* Scale the total demand to the lowest capacity CPU and
|
|
* convert into percentage.
|
|
*
|
|
* P = total_demand/sched_ravg_window * 1024/scale * 100
|
|
*/
|
|
|
|
min_cap_cpu = cpumask_first(&cpu_array[0][0]);
|
|
if (min_cap_cpu != -1)
|
|
scale = arch_scale_cpu_capacity(min_cap_cpu);
|
|
|
|
data->coloc_load_pct = div64_u64(total_demand * 1024 * 100,
|
|
(u64)sched_ravg_window * scale);
|
|
|
|
fill_util:
|
|
for_each_sched_cluster(cluster) {
|
|
int fcpu = cluster_first_cpu(cluster);
|
|
|
|
if (i == MAX_CLUSTERS)
|
|
break;
|
|
|
|
scale = arch_scale_cpu_capacity(fcpu);
|
|
data->ta_util_pct[i] = div64_u64(cluster->aggr_grp_load * 1024 *
|
|
100, (u64)sched_ravg_window * scale);
|
|
|
|
scale = arch_scale_freq_capacity(fcpu);
|
|
data->cur_cap_pct[i] = (scale * 100)/1024;
|
|
i++;
|
|
}
|
|
}
|
|
|
|
#define INIT_TASK_LOAD_PCT 15
|
|
static void walt_init_window_dep(void)
|
|
{
|
|
walt_scale_demand_divisor = sched_ravg_window >> SCHED_CAPACITY_SHIFT;
|
|
/* default task to 15 pct */
|
|
sched_init_task_load_windows = div64_u64((u64)INIT_TASK_LOAD_PCT *
|
|
(u64)sched_ravg_window, 100);
|
|
sched_init_task_load_windows_scaled =
|
|
scale_time_to_util(sched_init_task_load_windows);
|
|
|
|
walt_cpu_high_irqload = div64_u64((u64)sched_ravg_window * 95, (u64) 100);
|
|
}
|
|
|
|
static void walt_init_once(void)
|
|
{
|
|
init_irq_work(&walt_migration_irq_work, walt_irq_work);
|
|
init_irq_work(&walt_cpufreq_irq_work, walt_irq_work);
|
|
walt_init_window_dep();
|
|
}
|
|
|
|
static void walt_sched_init_rq(struct rq *rq)
|
|
{
|
|
int j;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
cpumask_set_cpu(cpu_of(rq), &wrq->freq_domain_cpumask);
|
|
|
|
wrq->walt_stats.cumulative_runnable_avg_scaled = 0;
|
|
wrq->prev_window_size = sched_ravg_window;
|
|
wrq->window_start = 0;
|
|
wrq->walt_stats.nr_big_tasks = 0;
|
|
wrq->walt_stats.nr_trailblazer_tasks = 0;
|
|
wrq->walt_flags = 0;
|
|
wrq->avg_irqload = 0;
|
|
wrq->prev_irq_time = 0;
|
|
wrq->last_irq_window = 0;
|
|
wrq->high_irqload = false;
|
|
wrq->task_exec_scale = 1024;
|
|
wrq->push_task = NULL;
|
|
wrq->lrb_pipeline_start_time = 0;
|
|
|
|
wrq->curr_runnable_sum = wrq->prev_runnable_sum = 0;
|
|
wrq->nt_curr_runnable_sum = wrq->nt_prev_runnable_sum = 0;
|
|
memset(&wrq->grp_time, 0, sizeof(struct group_cpu_time));
|
|
wrq->old_busy_time = 0;
|
|
wrq->old_estimated_time = 0;
|
|
wrq->walt_stats.pred_demands_sum_scaled = 0;
|
|
wrq->walt_stats.nr_rtg_high_prio_tasks = 0;
|
|
wrq->ed_task = NULL;
|
|
wrq->curr_table = 0;
|
|
wrq->prev_top = 0;
|
|
wrq->curr_top = 0;
|
|
wrq->last_cc_update = 0;
|
|
wrq->cycles = 0;
|
|
for (j = 0; j < NUM_TRACKED_WINDOWS; j++) {
|
|
memset(&wrq->load_subs[j], 0,
|
|
sizeof(struct load_subtractions));
|
|
wrq->top_tasks[j] = kcalloc(NUM_LOAD_INDICES,
|
|
sizeof(u8), GFP_ATOMIC | GFP_NOWAIT);
|
|
/* No other choice */
|
|
BUG_ON(!wrq->top_tasks[j]);
|
|
clear_top_tasks_bitmap(wrq->top_tasks_bitmap[j]);
|
|
}
|
|
wrq->notif_pending = false;
|
|
|
|
wrq->num_mvp_tasks = 0;
|
|
INIT_LIST_HEAD(&wrq->mvp_tasks);
|
|
wrq->mvp_arrival_time = 0;
|
|
wrq->mvp_throttle_time = 0;
|
|
wrq->skip_mvp = false;
|
|
wrq->uclamp_limit[UCLAMP_MIN] = 0;
|
|
wrq->uclamp_limit[UCLAMP_MAX] = SCHED_CAPACITY_SCALE;
|
|
}
|
|
|
|
void sched_window_nr_ticks_change(void)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&sched_ravg_window_lock, flags);
|
|
new_sched_ravg_window = mult_frac(sysctl_sched_ravg_window_nr_ticks,
|
|
NSEC_PER_SEC, HZ);
|
|
spin_unlock_irqrestore(&sched_ravg_window_lock, flags);
|
|
}
|
|
|
|
static void
|
|
walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
fixup_cumulative_runnable_avg(rq, p, &wrq->walt_stats, wts->demand_scaled,
|
|
wts->pred_demand_scaled);
|
|
}
|
|
|
|
static void
|
|
walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
|
|
fixup_cumulative_runnable_avg(rq, p, &wrq->walt_stats,
|
|
-(s64)wts->demand_scaled,
|
|
-(s64)wts->pred_demand_scaled);
|
|
}
|
|
|
|
static void android_rvh_wake_up_new_task(void *unused, struct task_struct *new)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
init_new_task_load(new);
|
|
add_new_task_to_grp(new);
|
|
}
|
|
|
|
static void walt_cpu_frequency_limits(void *unused, struct cpufreq_policy *policy)
|
|
{
|
|
int cpu;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
cpu_cluster(policy->cpu)->max_freq = policy->max;
|
|
for_each_cpu(cpu, policy->related_cpus)
|
|
update_cpu_capacity_helper(cpu);
|
|
}
|
|
|
|
static void android_rvh_sched_cpu_starting(void *unused, int cpu)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
clear_walt_request(cpu);
|
|
}
|
|
|
|
static void android_rvh_sched_cpu_dying(void *unused, int cpu)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
clear_walt_request(cpu);
|
|
}
|
|
|
|
static void android_rvh_set_task_cpu(void *unused, struct task_struct *p, unsigned int new_cpu)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
migrate_busy_time_subtraction(p, (int) new_cpu);
|
|
|
|
if (!cpumask_test_cpu(new_cpu, p->cpus_ptr))
|
|
WALT_BUG(WALT_BUG_WALT, p, "selecting unaffined cpu=%d comm=%s(%d) affinity=0x%lx",
|
|
new_cpu, p->comm, p->pid, (*(cpumask_bits(p->cpus_ptr))));
|
|
|
|
if (!p->in_execve &&
|
|
is_compat_thread(task_thread_info(p)) &&
|
|
!cpumask_test_cpu(new_cpu, system_32bit_el0_cpumask()))
|
|
WALT_BUG(WALT_BUG_WALT, p,
|
|
"selecting non 32 bit cpu=%d comm=%s(%d) 32bit_cpus=0x%lx",
|
|
new_cpu, p->comm, p->pid, (*(cpumask_bits(system_32bit_el0_cpumask()))));
|
|
}
|
|
|
|
static void android_rvh_new_task_stats(void *unused, struct task_struct *p)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
mark_task_starting(p);
|
|
}
|
|
|
|
static void android_rvh_account_irq(void *unused, struct task_struct *curr, int cpu,
|
|
s64 delta, bool start)
|
|
{
|
|
struct rq *rq;
|
|
unsigned long flags;
|
|
struct walt_rq *wrq;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
if (!walt_is_idle_task(curr))
|
|
return;
|
|
|
|
rq = cpu_rq(cpu);
|
|
wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
|
|
if (start) {
|
|
if (!wrq->window_start)
|
|
return;
|
|
|
|
/* We're here without rq->lock held, IRQ disabled */
|
|
raw_spin_lock(&rq->__lock);
|
|
update_task_cpu_cycles(curr, cpu, walt_sched_clock());
|
|
raw_spin_unlock(&rq->__lock);
|
|
} else {
|
|
raw_spin_lock_irqsave(&rq->__lock, flags);
|
|
walt_update_task_ravg(curr, rq, IRQ_UPDATE, walt_sched_clock(), delta);
|
|
raw_spin_unlock_irqrestore(&rq->__lock, flags);
|
|
|
|
wrq->last_irq_window = wrq->window_start;
|
|
}
|
|
}
|
|
|
|
static void android_rvh_flush_task(void *unused, struct task_struct *p)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
walt_task_dead(p);
|
|
}
|
|
|
|
static void android_rvh_enqueue_task(void *unused, struct rq *rq,
|
|
struct task_struct *p, int flags)
|
|
{
|
|
u64 wallclock;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
bool double_enqueue = false;
|
|
int mid_cluster_cpu;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
walt_lockdep_assert_rq(rq, p);
|
|
|
|
if (flags & ENQUEUE_WAKEUP)
|
|
per_cpu(wakeup_ctr, cpu_of(rq)) += 1;
|
|
|
|
if (!is_per_cpu_kthread(p))
|
|
wrq->enqueue_counter++;
|
|
|
|
if (task_thread_info(p)->cpu != cpu_of(rq))
|
|
WALT_BUG(WALT_BUG_UPSTREAM, p, "enqueuing on rq %d when task->cpu is %d\n",
|
|
cpu_of(rq), task_thread_info(p)->cpu);
|
|
|
|
/* catch double enqueue */
|
|
if (wts->prev_on_rq == 1) {
|
|
WALT_BUG(WALT_BUG_UPSTREAM, p, "double enqueue detected: task_cpu=%d new_cpu=%d\n",
|
|
task_cpu(p), cpu_of(rq));
|
|
double_enqueue = true;
|
|
}
|
|
|
|
wallclock = walt_rq_clock(rq);
|
|
if (wts->enqueue_after_migration != 0) {
|
|
wallclock = walt_sched_clock();
|
|
migrate_busy_time_addition(p, cpu_of(rq), wallclock);
|
|
wts->enqueue_after_migration = 0;
|
|
}
|
|
|
|
wts->prev_on_rq = 1;
|
|
wts->prev_on_rq_cpu = cpu_of(rq);
|
|
|
|
wts->last_enqueued_ts = wallclock;
|
|
sched_update_nr_prod(rq->cpu, 1);
|
|
|
|
if (walt_fair_task(p)) {
|
|
wts->misfit = !task_fits_max(p, rq->cpu);
|
|
if (!double_enqueue)
|
|
inc_rq_walt_stats(rq, p);
|
|
walt_cfs_enqueue_task(rq, p);
|
|
}
|
|
|
|
if (!double_enqueue)
|
|
walt_inc_cumulative_runnable_avg(rq, p);
|
|
|
|
|
|
|
|
if ((flags & ENQUEUE_WAKEUP) && walt_flag_test(p, WALT_TRAILBLAZER_BIT)) {
|
|
waltgov_run_callback(rq, WALT_CPUFREQ_TRAILBLAZER_BIT);
|
|
} else if (((flags & ENQUEUE_WAKEUP) ||
|
|
!is_cpu_flag_set(cpu_of(rq), CPU_FIRST_ENQ_IN_WINDOW)) && do_pl_notif(rq)) {
|
|
waltgov_run_callback(rq, WALT_CPUFREQ_PL_BIT);
|
|
} else if (walt_feat(WALT_FEAT_UCLAMP_FREQ_BIT)) {
|
|
unsigned long min, max;
|
|
|
|
min = uclamp_rq_get(rq, UCLAMP_MIN);
|
|
max = uclamp_rq_get(rq, UCLAMP_MAX);
|
|
if ((wrq->uclamp_limit[UCLAMP_MIN] != min) ||
|
|
(wrq->uclamp_limit[UCLAMP_MAX] != max)) {
|
|
wrq->uclamp_limit[UCLAMP_MIN] = min;
|
|
wrq->uclamp_limit[UCLAMP_MAX] = max;
|
|
waltgov_run_callback(rq, WALT_CPUFREQ_UCLAMP_BIT);
|
|
}
|
|
}
|
|
|
|
set_cpu_flag(cpu_of(rq), CPU_FIRST_ENQ_IN_WINDOW, 1);
|
|
if (num_sched_clusters >= 2) {
|
|
mid_cluster_cpu = cpumask_first(
|
|
&cpu_array[0][num_sched_clusters - 2]);
|
|
if (is_max_possible_cluster_cpu(rq->cpu) &&
|
|
!task_fits_max(p, mid_cluster_cpu)) {
|
|
if (!per_cpu(big_task_pid, rq->cpu))
|
|
per_cpu(big_task_pid, rq->cpu) = p->pid;
|
|
else if (p->pid != per_cpu(big_task_pid, rq->cpu))
|
|
per_cpu(big_task_pid, rq->cpu) = -1;
|
|
}
|
|
}
|
|
|
|
trace_sched_enq_deq_task(p, 1, cpumask_bits(p->cpus_ptr)[0], is_mvp(wts));
|
|
}
|
|
|
|
static void android_rvh_dequeue_task(void *unused, struct rq *rq,
|
|
struct task_struct *p, int flags)
|
|
{
|
|
struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
bool double_dequeue = false;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
walt_lockdep_assert_rq(rq, p);
|
|
|
|
/*
|
|
* a task can be enqueued before walt is started, and dequeued after.
|
|
* therefore the check to ensure that prev_on_rq_cpu is needed to prevent
|
|
* an invalid failure.
|
|
*/
|
|
if (wts->prev_on_rq_cpu >= 0 && wts->prev_on_rq_cpu != cpu_of(rq) &&
|
|
walt_flag_test(p, WALT_INIT_BIT))
|
|
WALT_BUG(WALT_BUG_UPSTREAM, p, "dequeue cpu %d not same as enqueue %d\n",
|
|
cpu_of(rq), wts->prev_on_rq_cpu);
|
|
|
|
/* no longer on a cpu */
|
|
wts->prev_on_rq_cpu = -1;
|
|
|
|
/* catch double deq */
|
|
if (wts->prev_on_rq == 2) {
|
|
WALT_BUG(WALT_BUG_UPSTREAM, p, "double dequeue detected: task_cpu=%d new_cpu=%d\n",
|
|
task_cpu(p), cpu_of(rq));
|
|
double_dequeue = true;
|
|
}
|
|
|
|
wts->prev_on_rq = 2;
|
|
if (p == wrq->ed_task)
|
|
is_ed_task_present(rq, walt_rq_clock(rq), p);
|
|
|
|
sched_update_nr_prod(rq->cpu, -1);
|
|
|
|
if (walt_fair_task(p)) {
|
|
if (!double_dequeue)
|
|
dec_rq_walt_stats(rq, p);
|
|
walt_cfs_dequeue_task(rq, p);
|
|
}
|
|
|
|
if (!double_dequeue)
|
|
walt_dec_cumulative_runnable_avg(rq, p);
|
|
|
|
if (walt_feat(WALT_FEAT_UCLAMP_FREQ_BIT)) {
|
|
unsigned long min, max;
|
|
|
|
min = uclamp_rq_get(rq, UCLAMP_MIN);
|
|
max = uclamp_rq_get(rq, UCLAMP_MAX);
|
|
if ((wrq->uclamp_limit[UCLAMP_MIN] != min) ||
|
|
(wrq->uclamp_limit[UCLAMP_MAX] != max)) {
|
|
wrq->uclamp_limit[UCLAMP_MIN] = min;
|
|
wrq->uclamp_limit[UCLAMP_MAX] = max;
|
|
waltgov_run_callback(rq, WALT_CPUFREQ_UCLAMP_BIT);
|
|
}
|
|
}
|
|
trace_sched_enq_deq_task(p, 0, cpumask_bits(p->cpus_ptr)[0], is_mvp(wts));
|
|
}
|
|
|
|
static void android_rvh_update_misfit_status(void *unused, struct task_struct *p,
|
|
struct rq *rq, bool *need_update)
|
|
{
|
|
struct walt_task_struct *wts;
|
|
struct walt_rq *wrq;
|
|
bool old_misfit, misfit;
|
|
int change;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
*need_update = false;
|
|
|
|
if (!p) {
|
|
rq->misfit_task_load = 0;
|
|
return;
|
|
}
|
|
|
|
wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
old_misfit = wts->misfit;
|
|
|
|
if (task_fits_max(p, rq->cpu))
|
|
rq->misfit_task_load = 0;
|
|
else
|
|
rq->misfit_task_load = task_util(p);
|
|
|
|
misfit = rq->misfit_task_load;
|
|
|
|
change = misfit - old_misfit;
|
|
if (change) {
|
|
sched_update_nr_prod(rq->cpu, 0);
|
|
wts->misfit = misfit;
|
|
wrq->walt_stats.nr_big_tasks += change;
|
|
BUG_ON(wrq->walt_stats.nr_big_tasks < 0);
|
|
}
|
|
}
|
|
|
|
/* utility function to update walt signals at wakeup */
|
|
static void android_rvh_try_to_wake_up(void *unused, struct task_struct *p)
|
|
{
|
|
struct rq *rq = cpu_rq(task_cpu(p));
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
|
|
struct rq_flags rf;
|
|
u64 wallclock;
|
|
unsigned int old_load;
|
|
struct walt_related_thread_group *grp = NULL;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
rq_lock_irqsave(rq, &rf);
|
|
old_load = task_load(p);
|
|
wallclock = walt_sched_clock();
|
|
|
|
/*
|
|
* Once task does a sleep(not the yield induce sleep)
|
|
* reset the flag, to ensure task is no longer qualified
|
|
* as frequent yielder.
|
|
* i.e. task needs to qualify again as frequent yielder.
|
|
*/
|
|
if (!(wts->yield_state & YIELD_INDUCED_SLEEP))
|
|
wts->yield_state = 0;
|
|
else
|
|
wts->yield_state &= YIELD_CNT_MASK;
|
|
|
|
if (walt_is_idle_task(rq->curr) && p->in_iowait)
|
|
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
|
walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
|
|
note_task_waking(p, wallclock);
|
|
rq_unlock_irqrestore(rq, &rf);
|
|
|
|
rcu_read_lock();
|
|
grp = task_related_thread_group(p);
|
|
if (update_preferred_cluster(grp, p, old_load, false))
|
|
set_preferred_cluster(grp);
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
static u64 tick_sched_clock;
|
|
static DECLARE_COMPLETION(tick_sched_clock_completion);
|
|
|
|
DEFINE_PER_CPU(unsigned long, intr_cnt);
|
|
DEFINE_PER_CPU(unsigned long, cycle_cnt);
|
|
DEFINE_PER_CPU(unsigned int, ipc_level);
|
|
DEFINE_PER_CPU(unsigned long, ipc_cnt);
|
|
DEFINE_PER_CPU(u64, last_ipc_update);
|
|
DEFINE_PER_CPU(u64, ipc_deactivate_ns);
|
|
DEFINE_PER_CPU(bool, tickless_mode);
|
|
static unsigned long calculate_ipc(int cpu)
|
|
{
|
|
unsigned long amu_cnt, delta_cycl = 0, delta_intr = 0;
|
|
unsigned long prev_cycl_cnt = per_cpu(cycle_cnt, cpu);
|
|
unsigned long prev_intr_cnt = per_cpu(intr_cnt, cpu);
|
|
unsigned long ipc = 0;
|
|
struct walt_sched_cluster *cluster = cpu_cluster(cpu);
|
|
|
|
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
|
|
delta_cycl = amu_cnt - prev_cycl_cnt;
|
|
per_cpu(cycle_cnt, cpu) = amu_cnt;
|
|
amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
|
|
per_cpu(intr_cnt, cpu) = amu_cnt;
|
|
delta_intr = amu_cnt - prev_intr_cnt;
|
|
if (prev_cycl_cnt && delta_cycl > cluster->smart_freq_info->min_cycles)
|
|
ipc = (delta_intr * 100) / delta_cycl;
|
|
|
|
per_cpu(ipc_cnt, cpu) = ipc;
|
|
per_cpu(last_ipc_update, cpu) = cpu_rq(cpu)->clock;
|
|
trace_ipc_update(cpu, per_cpu(cycle_cnt, cpu), per_cpu(intr_cnt, cpu),
|
|
per_cpu(ipc_cnt, cpu), per_cpu(last_ipc_update, cpu),
|
|
per_cpu(ipc_deactivate_ns, cpu), cpu_rq(cpu)->clock);
|
|
|
|
return ipc;
|
|
}
|
|
|
|
static void android_rvh_tick_entry(void *unused, struct rq *rq)
|
|
{
|
|
u64 wallclock;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
walt_lockdep_assert_rq(rq, NULL);
|
|
wallclock = walt_rq_clock(rq);
|
|
|
|
walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
|
|
|
if (is_ed_task_present(rq, wallclock, NULL))
|
|
waltgov_run_callback(rq, WALT_CPUFREQ_EARLY_DET_BIT);
|
|
}
|
|
|
|
bool is_sbt_or_oscillate(void)
|
|
{
|
|
return now_is_sbt || (oscillate_cpu != -1);
|
|
}
|
|
|
|
bool should_boost_bus_dcvs(void)
|
|
{
|
|
trace_sched_boost_bus_dcvs(oscillate_cpu);
|
|
|
|
return (oscillate_cpu != -1) || is_storage_boost();
|
|
}
|
|
EXPORT_SYMBOL_GPL(should_boost_bus_dcvs);
|
|
|
|
/*
|
|
* oscillate_cpu = {-1, cpu} tells if system is currently rotating a big
|
|
* task between Prime CPUs and on which CPU the big task is currently
|
|
* executing.
|
|
* If it is -1, no big task oscillation is occurring.
|
|
*/
|
|
int oscillate_cpu = -1;
|
|
|
|
bool should_oscillate(unsigned int busy_cpu, int *no_oscillate_reason)
|
|
{
|
|
int cpu;
|
|
int busy_cpu_count = 0;
|
|
|
|
if (busy_cpu >= nr_cpu_ids) {
|
|
*no_oscillate_reason = 1;
|
|
return false;
|
|
}
|
|
|
|
if (!is_obet) {
|
|
*no_oscillate_reason = 2;
|
|
return false;
|
|
}
|
|
|
|
if (!is_max_possible_cluster_cpu(busy_cpu)) {
|
|
*no_oscillate_reason = 3;
|
|
return false;
|
|
}
|
|
|
|
if (cpumask_weight(&cpu_array[0][num_sched_clusters - 1]) == 1) {
|
|
*no_oscillate_reason = 4;
|
|
return false;
|
|
}
|
|
|
|
for_each_cpu(cpu, &cpu_array[0][num_sched_clusters - 1]) {
|
|
busy_cpu_count += !available_idle_cpu(cpu);
|
|
}
|
|
if (busy_cpu_count != 1) {
|
|
*no_oscillate_reason = 5;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void android_vh_scheduler_tick(void *unused, struct rq *rq)
|
|
{
|
|
struct walt_related_thread_group *grp;
|
|
unsigned int old_load, last_ipc_level, curr_ipc_level;
|
|
unsigned long ipc;
|
|
int i, cpu = cpu_of(rq);
|
|
struct walt_sched_cluster *cluster;
|
|
struct smart_freq_cluster_info *smart_freq_info;
|
|
u64 last_deactivate_ns;
|
|
bool inform_governor = false;
|
|
char ipc_debug[15] = {0};
|
|
|
|
if (!tick_sched_clock) {
|
|
/*
|
|
* Let the window begin 20us prior to the tick,
|
|
* that way we are guaranteed a rollover when the tick occurs.
|
|
* Use rq->clock directly instead of rq_clock() since
|
|
* we do not have the rq lock and
|
|
* rq->clock was updated in the tick callpath.
|
|
*/
|
|
tick_sched_clock = rq->clock - 20000;
|
|
complete(&tick_sched_clock_completion);
|
|
}
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
old_load = task_load(rq->curr);
|
|
rcu_read_lock();
|
|
grp = task_related_thread_group(rq->curr);
|
|
if (update_preferred_cluster(grp, rq->curr, old_load, true))
|
|
set_preferred_cluster(grp);
|
|
rcu_read_unlock();
|
|
|
|
walt_lb_tick(rq);
|
|
|
|
/* IPC based smart FMAX */
|
|
cluster = cpu_cluster(cpu);
|
|
smart_freq_info = cluster->smart_freq_info;
|
|
if (smart_freq_init_done &&
|
|
smart_freq_info->smart_freq_ipc_participation_mask & IPC_PARTICIPATION) {
|
|
last_ipc_level = per_cpu(ipc_level, cpu);
|
|
last_deactivate_ns = per_cpu(ipc_deactivate_ns, cpu);
|
|
ipc = calculate_ipc(cpu);
|
|
|
|
if (enable_logging) {
|
|
snprintf(ipc_debug, sizeof(ipc_debug), "cpu_%d_ipc", cpu);
|
|
trace_clock_set_rate(ipc_debug, ipc, raw_smp_processor_id());
|
|
}
|
|
|
|
for (i = 0; i < SMART_FMAX_IPC_MAX; i++)
|
|
if (ipc < smart_freq_info->ipc_reason_config[i].ipc)
|
|
break;
|
|
|
|
if (i >= SMART_FMAX_IPC_MAX)
|
|
i = SMART_FMAX_IPC_MAX - 1;
|
|
|
|
curr_ipc_level = i;
|
|
if ((curr_ipc_level != last_ipc_level) || per_cpu(tickless_mode, cpu))
|
|
inform_governor = true;
|
|
|
|
if ((curr_ipc_level < last_ipc_level) &&
|
|
(smart_freq_info->ipc_reason_config[last_ipc_level].hyst_ns > 0)) {
|
|
if (!last_deactivate_ns) {
|
|
per_cpu(ipc_deactivate_ns, cpu) = rq->clock;
|
|
inform_governor = false;
|
|
} else {
|
|
u64 delta = rq->clock - last_deactivate_ns;
|
|
|
|
if (smart_freq_info->ipc_reason_config[last_ipc_level].hyst_ns >
|
|
delta)
|
|
inform_governor = false;
|
|
}
|
|
}
|
|
|
|
if (inform_governor) {
|
|
per_cpu(ipc_level, cpu) = curr_ipc_level;
|
|
per_cpu(ipc_deactivate_ns, cpu) = 0;
|
|
waltgov_run_callback(rq, WALT_CPUFREQ_SMART_FREQ_BIT);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void android_rvh_schedule(void *unused, struct task_struct *prev,
|
|
struct task_struct *next, struct rq *rq)
|
|
{
|
|
u64 wallclock;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) prev->android_vendor_data1;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
wallclock = walt_rq_clock(rq);
|
|
|
|
if (likely(prev != next)) {
|
|
if (!prev->on_rq)
|
|
wts->last_sleep_ts = wallclock;
|
|
walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
|
|
walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
|
|
} else {
|
|
walt_update_task_ravg(prev, rq, TASK_UPDATE, wallclock, 0);
|
|
}
|
|
}
|
|
|
|
static void android_rvh_sched_fork_init(void *unused, struct task_struct *p)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
__sched_fork_init(p);
|
|
}
|
|
|
|
static void android_rvh_ttwu_cond(void *unused, int cpu, bool *cond)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
*cond = (sysctl_sched_many_wakeup_threshold < WALT_MANY_WAKEUP_DEFAULT) &&
|
|
(cpu != smp_processor_id());
|
|
}
|
|
|
|
static void android_rvh_sched_exec(void *unused, bool *cond)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
*cond = true;
|
|
}
|
|
|
|
static void android_rvh_build_perf_domains(void *unused, bool *eas_check)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
*eas_check = true;
|
|
}
|
|
|
|
static void android_rvh_update_thermal_stats(void *unused, int cpu)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
update_cpu_capacity_helper(cpu);
|
|
}
|
|
|
|
static DECLARE_COMPLETION(rebuild_domains_completion);
|
|
static void rebuild_sd_workfn(struct work_struct *work);
|
|
static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
|
|
|
|
/** rebuild_sd_workfn
|
|
*
|
|
* rebuild the sched domains (and therefore the perf
|
|
* domains). It is absolutely necessary that the
|
|
* em_pds are created for each cpu device before
|
|
* proceeding, and this must complete for walt to
|
|
* function properly.
|
|
*/
|
|
static void rebuild_sd_workfn(struct work_struct *work)
|
|
{
|
|
int cpu;
|
|
struct device *cpu_dev;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
cpu_dev = get_cpu_device(cpu);
|
|
if (cpu_dev->em_pd)
|
|
continue;
|
|
|
|
WARN_ONCE(true, "must wait for perf domains to be created");
|
|
schedule_work(&rebuild_sd_work);
|
|
|
|
/* do not rebuild domains yet, and do not complete this action */
|
|
return;
|
|
}
|
|
|
|
rebuild_sched_domains();
|
|
complete(&rebuild_domains_completion);
|
|
}
|
|
|
|
u8 contiguous_yielding_windows;
|
|
static void walt_do_sched_yield_before(void *unused, long *skip)
|
|
{
|
|
struct walt_task_struct *wts = (struct walt_task_struct *)current->android_vendor_data1;
|
|
struct walt_sched_cluster *cluster;
|
|
struct smart_freq_cluster_info *smart_freq_info;
|
|
bool in_legacy_uncap;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
if (!walt_fair_task(current))
|
|
return;
|
|
|
|
cluster = cpu_cluster(task_cpu(current));
|
|
smart_freq_info = cluster->smart_freq_info;
|
|
|
|
if ((wts->yield_state & YIELD_CNT_MASK) >= MAX_YIELD_CNT_PER_TASK_THR) {
|
|
total_yield_cnt++;
|
|
if (contiguous_yielding_windows >= MIN_CONTIGUOUS_YIELDING_WINDOW) {
|
|
/*
|
|
* if we are under any legacy frequency uncap(i.e some
|
|
* load condition, ignore injecting sleep for the
|
|
* yielding task.
|
|
*/
|
|
in_legacy_uncap =
|
|
!!(smart_freq_info->cluster_active_reason &
|
|
~BIT(NO_REASON_SMART_FREQ));
|
|
if (!in_legacy_uncap) {
|
|
wts->yield_state |= YIELD_INDUCED_SLEEP;
|
|
total_sleep_cnt++;
|
|
*skip = true;
|
|
usleep_range_state(YIELD_SLEEP_TIME_USEC, YIELD_SLEEP_TIME_USEC,
|
|
TASK_INTERRUPTIBLE);
|
|
}
|
|
}
|
|
} else {
|
|
wts->yield_state++;
|
|
}
|
|
}
|
|
|
|
static void walt_do_sched_yield(void *unused, struct rq *rq)
|
|
{
|
|
struct task_struct *curr = rq->curr;
|
|
struct walt_task_struct *wts = (struct walt_task_struct *) curr->android_vendor_data1;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return;
|
|
|
|
walt_lockdep_assert_rq(rq, NULL);
|
|
|
|
if (!list_empty(&wts->mvp_list) && wts->mvp_list.next)
|
|
walt_cfs_deactivate_mvp_task(rq, curr);
|
|
|
|
if (per_cpu(rt_task_arrival_time, cpu_of(rq)))
|
|
per_cpu(rt_task_arrival_time, cpu_of(rq)) = 0;
|
|
}
|
|
|
|
int walt_set_cpus_taken(struct cpumask *set)
|
|
{
|
|
unsigned long flags;
|
|
int cpu;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return -EAGAIN;
|
|
|
|
spin_lock_irqsave(&cpus_taken_lock, flags);
|
|
for_each_cpu(cpu, set) {
|
|
per_cpu(cpus_taken_refcount, cpu)++;
|
|
}
|
|
cpumask_or(&walt_cpus_taken_mask, &walt_cpus_taken_mask, set);
|
|
spin_unlock_irqrestore(&cpus_taken_lock, flags);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(walt_set_cpus_taken);
|
|
|
|
int walt_unset_cpus_taken(struct cpumask *unset)
|
|
{
|
|
unsigned long flags;
|
|
int cpu;
|
|
|
|
if (unlikely(walt_disabled))
|
|
return -EAGAIN;
|
|
|
|
spin_lock_irqsave(&cpus_taken_lock, flags);
|
|
for_each_cpu(cpu, unset) {
|
|
if (per_cpu(cpus_taken_refcount, cpu) >= 1)
|
|
per_cpu(cpus_taken_refcount, cpu)--;
|
|
if (!per_cpu(cpus_taken_refcount, cpu))
|
|
cpumask_clear_cpu(cpu, &walt_cpus_taken_mask);
|
|
}
|
|
spin_unlock_irqrestore(&cpus_taken_lock, flags);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(walt_unset_cpus_taken);
|
|
|
|
cpumask_t walt_get_cpus_taken(void)
|
|
{
|
|
return walt_cpus_taken_mask;
|
|
}
|
|
EXPORT_SYMBOL_GPL(walt_get_cpus_taken);
|
|
|
|
int walt_get_cpus_in_state1(struct cpumask *cpus)
|
|
{
|
|
if (unlikely(walt_disabled))
|
|
return -EAGAIN;
|
|
|
|
cpumask_or(cpus, cpu_partial_halt_mask, &sched_cluster[0]->cpus);
|
|
cpumask_andnot(cpus, cpus, cpu_halt_mask);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(walt_get_cpus_in_state1);
|
|
|
|
cpumask_t walt_get_halted_cpus(void)
|
|
{
|
|
return *(cpu_halt_mask);
|
|
}
|
|
EXPORT_SYMBOL_GPL(walt_get_halted_cpus);
|
|
|
|
static void walt_cgroup_force_kthread_migration(void *unused, struct task_struct *tsk,
|
|
struct cgroup *dst_cgrp,
|
|
bool *force_migration)
|
|
{
|
|
/* no depenency on walt_disabled flag here */
|
|
|
|
/*
|
|
* RT kthreads may be born in a cgroup with no rt_runtime allocated.
|
|
* Just say no.
|
|
*/
|
|
#ifdef CONFIG_RT_GROUP_SCHED
|
|
if (tsk->no_cgroup_migration && (dst_cgrp->root->subsys_mask & (1U << cpu_cgrp_id)))
|
|
return;
|
|
#endif
|
|
|
|
/*
|
|
* kthreads may acquire PF_NO_SETAFFINITY during initialization.
|
|
* If userland migrates such a kthread to a non-root cgroup, it can
|
|
* become trapped in a cpuset. Just say no.
|
|
*/
|
|
#ifdef CONFIG_CPUSETS
|
|
if ((tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) &&
|
|
(dst_cgrp->root->subsys_mask & (1U << cpuset_cgrp_id)))
|
|
return;
|
|
#endif
|
|
*force_migration = true;
|
|
}
|
|
|
|
static void register_walt_hooks(void)
|
|
{
|
|
register_trace_android_rvh_wake_up_new_task(android_rvh_wake_up_new_task, NULL);
|
|
register_trace_android_rvh_update_cpu_capacity(android_rvh_update_cpu_capacity, NULL);
|
|
register_trace_android_rvh_sched_cpu_starting(android_rvh_sched_cpu_starting, NULL);
|
|
register_trace_android_rvh_sched_cpu_dying(android_rvh_sched_cpu_dying, NULL);
|
|
register_trace_android_rvh_set_task_cpu(android_rvh_set_task_cpu, NULL);
|
|
register_trace_android_rvh_new_task_stats(android_rvh_new_task_stats, NULL);
|
|
register_trace_android_rvh_account_irq(android_rvh_account_irq, NULL);
|
|
register_trace_android_rvh_flush_task(android_rvh_flush_task, NULL);
|
|
register_trace_android_rvh_update_misfit_status(android_rvh_update_misfit_status, NULL);
|
|
register_trace_android_rvh_enqueue_task(android_rvh_enqueue_task, NULL);
|
|
register_trace_android_rvh_dequeue_task(android_rvh_dequeue_task, NULL);
|
|
register_trace_android_rvh_try_to_wake_up(android_rvh_try_to_wake_up, NULL);
|
|
register_trace_android_rvh_tick_entry(android_rvh_tick_entry, NULL);
|
|
register_trace_android_vh_scheduler_tick(android_vh_scheduler_tick, NULL);
|
|
register_trace_android_rvh_schedule(android_rvh_schedule, NULL);
|
|
register_trace_android_rvh_cpu_cgroup_attach(android_rvh_cpu_cgroup_attach, NULL);
|
|
register_trace_android_rvh_cpu_cgroup_online(android_rvh_cpu_cgroup_online, NULL);
|
|
register_trace_android_rvh_sched_fork_init(android_rvh_sched_fork_init, NULL);
|
|
register_trace_android_rvh_ttwu_cond(android_rvh_ttwu_cond, NULL);
|
|
register_trace_android_rvh_sched_exec(android_rvh_sched_exec, NULL);
|
|
register_trace_android_rvh_build_perf_domains(android_rvh_build_perf_domains, NULL);
|
|
register_trace_cpu_frequency_limits(walt_cpu_frequency_limits, NULL);
|
|
register_trace_android_rvh_do_sched_yield(walt_do_sched_yield, NULL);
|
|
register_trace_android_rvh_before_do_sched_yield(walt_do_sched_yield_before, NULL);
|
|
register_trace_android_rvh_update_thermal_stats(android_rvh_update_thermal_stats, NULL);
|
|
register_trace_android_rvh_cgroup_force_kthread_migration(
|
|
walt_cgroup_force_kthread_migration, NULL);
|
|
}
|
|
|
|
atomic64_t walt_irq_work_lastq_ws;
|
|
bool walt_disabled = true;
|
|
|
|
static int walt_init_stop_handler(void *data)
|
|
{
|
|
int cpu;
|
|
struct task_struct *g, *p;
|
|
struct walt_rq *wrq;
|
|
int level;
|
|
|
|
read_lock(&tasklist_lock);
|
|
level = 0;
|
|
for_each_possible_cpu(cpu) {
|
|
if (level == 0)
|
|
raw_spin_lock(&cpu_rq(cpu)->__lock);
|
|
else
|
|
raw_spin_lock_nested(&cpu_rq(cpu)->__lock, level);
|
|
level++;
|
|
}
|
|
|
|
/* existing tasks get a demand of 0, including idle task */
|
|
for_each_process_thread(g, p) {
|
|
init_new_task_load(p);
|
|
}
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
/* Create task members for idle thread */
|
|
init_new_task_load(cpu_rq(cpu)->idle);
|
|
walt_flag_set(cpu_rq(cpu)->idle, WALT_IDLE_TASK_BIT, 1);
|
|
}
|
|
|
|
/* post walt_init_once() a new task will get a non zero demand */
|
|
walt_init_once();
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
struct rq *rq = cpu_rq(cpu);
|
|
|
|
|
|
walt_sched_init_rq(rq);
|
|
|
|
wrq = &per_cpu(walt_rq, cpu_of(rq));
|
|
wrq->window_start = tick_sched_clock;
|
|
}
|
|
|
|
atomic64_set(&walt_irq_work_lastq_ws, tick_sched_clock);
|
|
|
|
create_default_coloc_group();
|
|
|
|
walt_disabled = false;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
raw_spin_unlock(&cpu_rq(cpu)->__lock);
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
return 0;
|
|
}
|
|
|
|
static void walt_init_tg_pointers(void)
|
|
{
|
|
struct cgroup_subsys_state *css = &root_task_group.css;
|
|
struct cgroup_subsys_state *top_css = css;
|
|
|
|
rcu_read_lock();
|
|
css_for_each_child(css, top_css)
|
|
walt_update_tg_pointer(css);
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
static void walt_remove_cpufreq_efficiencies_available(void)
|
|
{
|
|
struct cpufreq_policy *policy;
|
|
struct walt_sched_cluster *cluster;
|
|
|
|
for_each_sched_cluster(cluster) {
|
|
policy = cpufreq_cpu_get(cluster_first_cpu(cluster));
|
|
if (policy) {
|
|
policy->efficiencies_available = false;
|
|
cpufreq_cpu_put(policy);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void walt_init(struct work_struct *work)
|
|
{
|
|
static atomic_t already_inited = ATOMIC_INIT(0);
|
|
struct root_domain *rd = cpu_rq(cpumask_first(cpu_active_mask))->rd;
|
|
int i;
|
|
|
|
might_sleep();
|
|
|
|
if (atomic_cmpxchg(&already_inited, 0, 1))
|
|
return;
|
|
|
|
register_syscore_ops(&walt_syscore_ops);
|
|
BUG_ON(alloc_related_thread_groups());
|
|
init_clusters();
|
|
walt_init_tg_pointers();
|
|
|
|
register_walt_hooks();
|
|
walt_fixup_init();
|
|
walt_lb_init();
|
|
walt_rt_init();
|
|
walt_cfs_init();
|
|
walt_halt_init();
|
|
walt_mvp_lock_ordering_init();
|
|
|
|
wait_for_completion_interruptible(&tick_sched_clock_completion);
|
|
|
|
if (!rcu_access_pointer(rd->pd)) {
|
|
/*
|
|
* perf domains not properly configured. this is a must as
|
|
* create_util_to_cost depends on rd->pd being properly
|
|
* initialized.
|
|
*/
|
|
schedule_work(&rebuild_sd_work);
|
|
wait_for_completion_interruptible(&rebuild_domains_completion);
|
|
}
|
|
|
|
walt_update_cluster_topology();
|
|
walt_remove_cpufreq_efficiencies_available();
|
|
walt_config();
|
|
walt_init_cycle_counter();
|
|
|
|
stop_machine(walt_init_stop_handler, NULL, NULL);
|
|
|
|
/*
|
|
* validate root-domain perf-domain is configured properly
|
|
* to work with an asymmetrical soc. This is necessary
|
|
* for load balance and task placement to work properly.
|
|
* see walt_find_energy_efficient_cpu(), and
|
|
* create_util_to_cost().
|
|
*/
|
|
if (!rcu_access_pointer(rd->pd) && num_sched_clusters > 1)
|
|
WALT_BUG(WALT_BUG_WALT, NULL,
|
|
"root domain's perf-domain values not initialized rd->pd=%p.",
|
|
rd->pd);
|
|
|
|
walt_register_sysctl();
|
|
walt_register_debugfs();
|
|
|
|
input_boost_init();
|
|
core_ctl_init();
|
|
walt_boost_init();
|
|
waltgov_register();
|
|
|
|
i = match_string(sched_feat_names, __SCHED_FEAT_NR, "TTWU_QUEUE");
|
|
if (i >= 0) {
|
|
static_key_disable(&sched_feat_keys[i]);
|
|
sysctl_sched_features &= ~(1UL << i);
|
|
}
|
|
|
|
topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_ARCH, cpu_online_mask);
|
|
|
|
enable_logging = !!sec_debug_is_enabled();
|
|
}
|
|
|
|
static DECLARE_WORK(walt_init_work, walt_init);
|
|
static void android_vh_update_topology_flags_workfn(void *unused, void *unused2)
|
|
{
|
|
schedule_work(&walt_init_work);
|
|
}
|
|
|
|
static void walt_devicetree_init(void)
|
|
{
|
|
struct device_node *np;
|
|
int ret;
|
|
|
|
np = of_find_node_by_name(NULL, "sched_walt");
|
|
if (!np) {
|
|
pr_err("Failed to find node of sched_walt\n");
|
|
return;
|
|
}
|
|
|
|
ret = of_property_read_u32(np, "panic_on_walt_bug", &sysctl_panic_on_walt_bug);
|
|
if (ret < 0) {
|
|
pr_err("Failed to read panic_on_walt_bug property\n");
|
|
return;
|
|
}
|
|
}
|
|
|
|
#define WALT_VENDOR_DATA_SIZE_TEST(wstruct, kstruct) \
|
|
BUILD_BUG_ON(sizeof(wstruct) > (sizeof(u64) * \
|
|
ARRAY_SIZE(((kstruct *)0)->android_vendor_data1)))
|
|
|
|
static int walt_module_init(void)
|
|
{
|
|
/* compile time checks for vendor data size */
|
|
WALT_VENDOR_DATA_SIZE_TEST(struct walt_task_struct, struct task_struct);
|
|
WALT_VENDOR_DATA_SIZE_TEST(struct walt_task_group, struct task_group);
|
|
walt_devicetree_init();
|
|
register_trace_android_vh_update_topology_flags_workfn(
|
|
android_vh_update_topology_flags_workfn, NULL);
|
|
|
|
if (topology_update_done)
|
|
schedule_work(&walt_init_work);
|
|
|
|
walt_cpufreq_cycle_cntr_driver_register();
|
|
walt_gclk_cycle_counter_driver_register();
|
|
|
|
return 0;
|
|
}
|
|
|
|
module_init(walt_module_init);
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
MODULE_SOFTDEP("pre: socinfo");
|
|
#if IS_ENABLED(CONFIG_SCHED_WALT_DEBUG)
|
|
MODULE_SOFTDEP("pre: sched-walt-debug");
|
|
#endif
|
|
|
|
#if IS_ENABLED(CONFIG_SEC_QC_SUMMARY)
|
|
#include <linux/samsung/debug/qcom/sec_qc_summary.h>
|
|
|
|
void sec_qc_summary_set_sched_walt_info(struct sec_qc_summary_data_apss *apss)
|
|
{
|
|
apss->aplpm.num_clusters = num_sched_clusters;
|
|
apss->aplpm.p_cluster = virt_to_phys(sched_cluster);
|
|
}
|
|
EXPORT_SYMBOL(sec_qc_summary_set_sched_walt_info);
|
|
#endif |