MIPS: pm-cps: Use per-CPU variables as per-CPU, not per-core

[ Upstream commit 00a134fc2bb4a5f8fada58cf7ff4259149691d64 ]

The pm-cps code has up until now used per-CPU variables indexed by core,
rather than CPU number, in order to share data amongst sibling CPUs (ie.
VPs/threads in a core). This works fine for single cluster systems, but
with multi-cluster systems a core number is no longer unique in the
system, leading to sharing between CPUs that are not actually siblings.

Avoid this issue by using per-CPU variables as they are more generally
used - ie. access them using CPU numbers rather than core numbers.
Sharing between siblings is then accomplished by:
 - Assigning the same pointer to entries for each sibling CPU for the
   nc_asm_enter & ready_count variables, which allow this by virtue of
   being per-CPU pointers.

 - Indexing by the first CPU set in a CPUs cpu_sibling_map in the case
   of pm_barrier, for which we can't use the previous approach because
   the per-CPU variable is not a pointer.

Signed-off-by: Paul Burton <paulburton@kernel.org>
Signed-off-by: Dragan Mladjenovic <dragan.mladjenovic@syrmia.com>
Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
Tested-by: Serge Semin <fancer.lancer@gmail.com>
Tested-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Paul Burton
2025-01-29 13:32:48 +01:00
committed by Greg Kroah-Hartman
parent ba41e4e627
commit 2e1b3650f5

View File

@@ -56,10 +56,7 @@ static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
/* Indicates online CPUs coupled with the current CPU */
static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
/*
* Used to synchronize entry to deep idle states. Actually per-core rather
* than per-CPU.
*/
/* Used to synchronize entry to deep idle states */
static DEFINE_PER_CPU_ALIGNED(atomic_t, pm_barrier);
/* Saved CPU state across the CPS_PM_POWER_GATED state */
@@ -118,9 +115,10 @@ int cps_pm_enter_state(enum cps_pm_state state)
cps_nc_entry_fn entry;
struct core_boot_config *core_cfg;
struct vpe_boot_config *vpe_cfg;
atomic_t *barrier;
/* Check that there is an entry function for this state */
entry = per_cpu(nc_asm_enter, core)[state];
entry = per_cpu(nc_asm_enter, cpu)[state];
if (!entry)
return -EINVAL;
@@ -156,7 +154,7 @@ int cps_pm_enter_state(enum cps_pm_state state)
smp_mb__after_atomic();
/* Create a non-coherent mapping of the core ready_count */
core_ready_count = per_cpu(ready_count, core);
core_ready_count = per_cpu(ready_count, cpu);
nc_addr = kmap_noncoherent(virt_to_page(core_ready_count),
(unsigned long)core_ready_count);
nc_addr += ((unsigned long)core_ready_count & ~PAGE_MASK);
@@ -164,7 +162,8 @@ int cps_pm_enter_state(enum cps_pm_state state)
/* Ensure ready_count is zero-initialised before the assembly runs */
WRITE_ONCE(*nc_core_ready_count, 0);
coupled_barrier(&per_cpu(pm_barrier, core), online);
barrier = &per_cpu(pm_barrier, cpumask_first(&cpu_sibling_map[cpu]));
coupled_barrier(barrier, online);
/* Run the generated entry code */
left = entry(online, nc_core_ready_count);
@@ -635,12 +634,14 @@ out_err:
static int cps_pm_online_cpu(unsigned int cpu)
{
enum cps_pm_state state;
unsigned core = cpu_core(&cpu_data[cpu]);
unsigned int sibling, core;
void *entry_fn, *core_rc;
enum cps_pm_state state;
core = cpu_core(&cpu_data[cpu]);
for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
if (per_cpu(nc_asm_enter, core)[state])
if (per_cpu(nc_asm_enter, cpu)[state])
continue;
if (!test_bit(state, state_support))
continue;
@@ -652,16 +653,19 @@ static int cps_pm_online_cpu(unsigned int cpu)
clear_bit(state, state_support);
}
per_cpu(nc_asm_enter, core)[state] = entry_fn;
for_each_cpu(sibling, &cpu_sibling_map[cpu])
per_cpu(nc_asm_enter, sibling)[state] = entry_fn;
}
if (!per_cpu(ready_count, core)) {
if (!per_cpu(ready_count, cpu)) {
core_rc = kmalloc(sizeof(u32), GFP_KERNEL);
if (!core_rc) {
pr_err("Failed allocate core %u ready_count\n", core);
return -ENOMEM;
}
per_cpu(ready_count, core) = core_rc;
for_each_cpu(sibling, &cpu_sibling_map[cpu])
per_cpu(ready_count, sibling) = core_rc;
}
return 0;