Add samsung specific changes

This commit is contained in:
2025-08-11 14:29:00 +02:00
parent c66122e619
commit 4d134a1294
2688 changed files with 1127995 additions and 11475 deletions

View File

@@ -45,6 +45,18 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST
If unsure, say N here.
config IOMMU_IO_PGTABLE_FAST
bool "Fast ARMv7/v8 Long Descriptor Format"
depends on (ARM || ARM64) && IOMMU_DMA
help
Enable support for a subset of the ARM long descriptor pagetable
format. This allocator achieves fast performance by
pre-allocating and pre-populating page table memory up front.
only supports a 32 bit virtual address space.
This implementation is mainly optimized for use cases where the
buffers are small (<= 64K) since it only supports 4K page sizes.
config IOMMU_IO_PGTABLE_ARMV7S
bool "ARMv7/v8 Short Descriptor Format"
select IOMMU_IO_PGTABLE
@@ -332,6 +344,15 @@ config ARM_SMMU
Say Y here if your SoC includes an IOMMU device implementing
the ARM SMMU architecture.
config ARM_SMMU_CONTEXT_FAULT_RETRY
bool "Context fault retry sequence"
depends on ARM_SMMU && (ARCH_WAIPIO || ARCH_PARROT)
help
In some cases, issuing a tlb invalidate operation after a
context fault may cause a subsequent retry of the failing
address to succeed. Only applies to clients which have
stall-on-fault enabled, like display.
config ARM_SMMU_LEGACY_DT_BINDINGS
bool "Support the legacy \"mmu-masters\" devicetree bindings"
depends on ARM_SMMU=y && OF
@@ -387,6 +408,44 @@ config ARM_SMMU_QCOM_DEBUG
Say Y here to enable debug for issues such as TLB sync timeouts
which requires implementation defined register dumps.
config QTI_IOMMU_SUPPORT
tristate "Support for QTI iommu drivers"
help
The QTI GPU device may switch between multiple iommu domains,
depending on usecase. This introduces a need to track all such
domains in a non-driver specific manner.
If in doubt say N.
config QCOM_IOMMU_UTIL
tristate "Support for qcom additions to the iommu framework"
help
QCOM iommu drivers support a general set of functionality in
addition to the functions provided by the iommu framework.
This includes devicetree properties for configuring iommu
groups and iova ranges.
Say N here if unsure.
config QCOM_IOMMU_DEBUG
tristate "IOMMU debugging and testing"
depends on QCOM_IOMMU_UTIL
depends on DEBUG_FS
help
This option is used to enable profiling and debugging in
the IOMMU framework code. IOMMU profiling and debugging
can be done through the debugfs nodes which this option
makes available.
config ARM_SMMU_SELFTEST
bool "ARM SMMU self test support"
depends on ARM_SMMU
help
Enables self tests for arm smmu. Tests basic hardware
configurations like interrupts. Note that enabling this
option can marginally increase the boot time.
If unsure, say N.
config ARM_SMMU_V3
tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
depends on ARM64
@@ -503,4 +562,28 @@ config SPRD_IOMMU
Say Y here if you want to use the multimedia devices listed above.
config QCOM_LAZY_MAPPING
tristate "Reference counted iommu-mapping support"
depends on QCOM_DMABUF_HEAPS
depends on IOMMU_API
help
DMA-BUFs may be shared between several software clients.
Reference counting the mapping may simplify coordination between
these clients, and decrease latency by preventing multiple
map/unmaps of the same region.
If unsure, say N here.
config QTVM_IOMMU_TRACE_HOOKS
bool "Trace hooks used for QTVM"
depends on QCOM_IOMMU_UTIL
depends on !ANDROID_VENDOR_HOOKS
help
When Android vendor hooks isn't available in a non Android
environment, enable this config to call certain vendor hooks
in the absence of Android vendor hooks. This helps to enable
certain feature in the non Android environment.
If unsure, say N here.
endif # IOMMU_SUPPORT

View File

@@ -29,4 +29,12 @@ obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_QCOM_LAZY_MAPPING) += msm_dma_iommu_mapping.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o
obj-$(CONFIG_QCOM_IOMMU_UTIL) += qcom_iommu_util.o
qcom_iommu_util-y += qcom-iommu-util.o
qcom_iommu_util-$(CONFIG_IOMMU_IO_PGTABLE_FAST) += qcom-dma-iommu-generic.o io-pgtable-fast.o dma-mapping-fast.o
qcom_iommu_util-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += qcom-io-pgtable-arm.o qcom-io-pgtable-alloc.o
obj-$(CONFIG_QTI_IOMMU_SUPPORT) += iommu-logger.o
obj-$(CONFIG_QCOM_IOMMU_DEBUG) += qcom_iommu_debug.o
qcom_iommu_debug-y += qcom-iommu-debug.o qcom-iommu-debug-user.o

View File

@@ -2,5 +2,5 @@
obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
obj-$(CONFIG_ARM_SMMU) += arm_smmu.o
arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o
arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o
arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o arm-smmu-qcom-pm.o
arm_smmu-$(CONFIG_ARM_SMMU_QCOM_DEBUG) += arm-smmu-qcom-debug.o

View File

@@ -219,6 +219,12 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
of_device_is_compatible(np, "nvidia,tegra186-smmu"))
return nvidia_smmu_impl_init(smmu);
if (of_device_is_compatible(smmu->dev->of_node, "qcom,qsmmu-v500"))
return qsmmuv500_impl_init(smmu);
if (of_device_is_compatible(smmu->dev->of_node, "qcom,smmu-v2"))
return qsmmuv2_impl_init(smmu);
if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM))
smmu = qcom_smmu_impl_init(smmu);

View File

@@ -0,0 +1,337 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/clk.h>
#include <linux/regulator/consumer.h>
#include <linux/interconnect.h>
#include <linux/of_platform.h>
#include <linux/iopoll.h>
#include "arm-smmu.h"
#define ARM_SMMU_ICC_AVG_BW 0
#define ARM_SMMU_ICC_PEAK_BW_HIGH 1000
#define ARM_SMMU_ICC_PEAK_BW_LOW 0
#define ARM_SMMU_ICC_ACTIVE_ONLY_TAG 0x3
/*
* Theoretically, our interconnect does not guarantee the order between
* writes to different "register blocks" even with device memory type.
* It does guarantee that the completion of a read to a particular
* register block implies that previously issued writes to that
* register block have completed, with device memory type.
*
* In particular, we need to ensure that writes to iommu registers
* complete before we turn off the power.
*/
static void arm_smmu_arch_write_sync(struct arm_smmu_device *smmu)
{
u32 id;
if (!smmu)
return;
/* Read to complete prior write transcations */
id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
/* Wait for read to complete before off */
rmb();
}
static int arm_smmu_prepare_clocks(struct arm_smmu_power_resources *pwr)
{
int i, ret = 0;
for (i = 0; i < pwr->num_clocks; ++i) {
ret = clk_prepare(pwr->clocks[i]);
if (ret) {
dev_err(pwr->dev, "Couldn't prepare clock #%d\n", i);
while (i--)
clk_unprepare(pwr->clocks[i]);
break;
}
}
return ret;
}
static void arm_smmu_unprepare_clocks(struct arm_smmu_power_resources *pwr)
{
int i;
for (i = pwr->num_clocks; i; --i)
clk_unprepare(pwr->clocks[i - 1]);
}
static int arm_smmu_enable_clocks(struct arm_smmu_power_resources *pwr)
{
int i, ret = 0;
for (i = 0; i < pwr->num_clocks; ++i) {
ret = clk_enable(pwr->clocks[i]);
if (ret) {
dev_err(pwr->dev, "Couldn't enable clock #%d\n", i);
while (i--)
clk_disable(pwr->clocks[i]);
break;
}
}
return ret;
}
static void arm_smmu_disable_clocks(struct arm_smmu_power_resources *pwr)
{
int i;
for (i = pwr->num_clocks; i; --i)
clk_disable(pwr->clocks[i - 1]);
}
static int arm_smmu_raise_interconnect_bw(struct arm_smmu_power_resources *pwr)
{
if (!pwr->icc_path)
return 0;
return icc_set_bw(pwr->icc_path, ARM_SMMU_ICC_AVG_BW,
ARM_SMMU_ICC_PEAK_BW_HIGH);
}
static void arm_smmu_lower_interconnect_bw(struct arm_smmu_power_resources *pwr)
{
if (!pwr->icc_path)
return;
WARN_ON(icc_set_bw(pwr->icc_path, ARM_SMMU_ICC_AVG_BW,
ARM_SMMU_ICC_PEAK_BW_LOW));
}
static int arm_smmu_enable_regulators(struct arm_smmu_power_resources *pwr)
{
struct regulator_bulk_data *consumers;
int num_consumers, ret;
int i;
num_consumers = pwr->num_gdscs;
consumers = pwr->gdscs;
for (i = 0; i < num_consumers; i++) {
ret = regulator_enable(consumers[i].consumer);
if (ret)
goto out;
}
return 0;
out:
i -= 1;
for (; i >= 0; i--)
regulator_disable(consumers[i].consumer);
return ret;
}
int arm_smmu_power_on(struct arm_smmu_power_resources *pwr)
{
int ret;
mutex_lock(&pwr->power_lock);
if (pwr->power_count > 0) {
pwr->power_count += 1;
mutex_unlock(&pwr->power_lock);
return 0;
}
ret = arm_smmu_raise_interconnect_bw(pwr);
if (ret)
goto out_unlock;
ret = arm_smmu_enable_regulators(pwr);
if (ret)
goto out_disable_bus;
ret = arm_smmu_prepare_clocks(pwr);
if (ret)
goto out_disable_regulators;
ret = arm_smmu_enable_clocks(pwr);
if (ret)
goto out_unprepare_clocks;
if (pwr->resume) {
ret = pwr->resume(pwr);
if (ret)
goto out_disable_clocks;
}
pwr->power_count = 1;
mutex_unlock(&pwr->power_lock);
return 0;
out_disable_clocks:
arm_smmu_disable_clocks(pwr);
out_unprepare_clocks:
arm_smmu_unprepare_clocks(pwr);
out_disable_regulators:
regulator_bulk_disable(pwr->num_gdscs, pwr->gdscs);
out_disable_bus:
arm_smmu_lower_interconnect_bw(pwr);
out_unlock:
mutex_unlock(&pwr->power_lock);
return ret;
}
/*
* Needing to pass smmu to this api for arm_smmu_arch_write_sync is awkward.
*/
void arm_smmu_power_off(struct arm_smmu_device *smmu,
struct arm_smmu_power_resources *pwr)
{
mutex_lock(&pwr->power_lock);
if (pwr->power_count == 0) {
WARN(1, "%s: Bad power count\n", dev_name(pwr->dev));
mutex_unlock(&pwr->power_lock);
return;
} else if (pwr->power_count > 1) {
pwr->power_count--;
mutex_unlock(&pwr->power_lock);
return;
}
if (pwr->suspend)
pwr->suspend(pwr);
arm_smmu_arch_write_sync(smmu);
arm_smmu_disable_clocks(pwr);
arm_smmu_unprepare_clocks(pwr);
regulator_bulk_disable(pwr->num_gdscs, pwr->gdscs);
arm_smmu_lower_interconnect_bw(pwr);
pwr->power_count = 0;
mutex_unlock(&pwr->power_lock);
}
static int arm_smmu_init_clocks(struct arm_smmu_power_resources *pwr)
{
const char *cname;
struct property *prop;
int i;
struct device *dev = pwr->dev;
pwr->num_clocks =
of_property_count_strings(dev->of_node, "clock-names");
if (pwr->num_clocks < 1) {
pwr->num_clocks = 0;
return 0;
}
pwr->clocks = devm_kzalloc(
dev, sizeof(*pwr->clocks) * pwr->num_clocks,
GFP_KERNEL);
if (!pwr->clocks)
return -ENOMEM;
i = 0;
of_property_for_each_string(dev->of_node, "clock-names",
prop, cname) {
struct clk *c = devm_clk_get(dev, cname);
if (IS_ERR(c)) {
dev_err(dev, "Couldn't get clock: %s\n",
cname);
return PTR_ERR(c);
}
if (clk_get_rate(c) == 0) {
long rate = clk_round_rate(c, 1000);
clk_set_rate(c, rate);
}
pwr->clocks[i] = c;
++i;
}
return 0;
}
static int arm_smmu_init_regulators(struct arm_smmu_power_resources *pwr)
{
const char *cname;
struct property *prop;
int i;
struct device *dev = pwr->dev;
pwr->num_gdscs =
of_property_count_strings(dev->of_node, "qcom,regulator-names");
if (pwr->num_gdscs < 1) {
pwr->num_gdscs = 0;
return 0;
}
pwr->gdscs = devm_kzalloc(
dev, sizeof(*pwr->gdscs) * pwr->num_gdscs, GFP_KERNEL);
if (!pwr->gdscs)
return -ENOMEM;
i = 0;
of_property_for_each_string(dev->of_node, "qcom,regulator-names",
prop, cname)
pwr->gdscs[i++].supply = cname;
return devm_regulator_bulk_get(dev, pwr->num_gdscs, pwr->gdscs);
}
static int arm_smmu_init_interconnect(struct arm_smmu_power_resources *pwr)
{
struct device *dev = pwr->dev;
/* We don't want the interconnect APIs to print an error message */
if (!of_find_property(dev->of_node, "interconnects", NULL)) {
dev_dbg(dev, "No interconnect info\n");
return 0;
}
pwr->icc_path = devm_of_icc_get(dev, NULL);
if (IS_ERR_OR_NULL(pwr->icc_path)) {
if (PTR_ERR(pwr->icc_path) != -EPROBE_DEFER)
dev_err(dev, "Unable to read interconnect path from devicetree rc: %ld\n",
PTR_ERR(pwr->icc_path));
return pwr->icc_path ? PTR_ERR(pwr->icc_path) : -EINVAL;
}
if (of_property_read_bool(dev->of_node, "qcom,active-only"))
icc_set_tag(pwr->icc_path, ARM_SMMU_ICC_ACTIVE_ONLY_TAG);
return 0;
}
/*
* Cleanup done by devm. Any non-devm resources must clean up themselves.
*/
struct arm_smmu_power_resources *arm_smmu_init_power_resources(
struct device *dev)
{
struct arm_smmu_power_resources *pwr;
int ret;
pwr = devm_kzalloc(dev, sizeof(*pwr), GFP_KERNEL);
if (!pwr)
return ERR_PTR(-ENOMEM);
pwr->dev = dev;
mutex_init(&pwr->power_lock);
ret = arm_smmu_init_clocks(pwr);
if (ret)
return ERR_PTR(ret);
ret = arm_smmu_init_regulators(pwr);
if (ret)
return ERR_PTR(ret);
ret = arm_smmu_init_interconnect(pwr);
if (ret)
return ERR_PTR(ret);
return pwr;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,200 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2019, 2021 The Linux Foundation. All rights reserved.
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM arm_smmu
#if !defined(_TRACE_ARM_SMMU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_ARM_SMMU_H
#include <linux/types.h>
#include <linux/tracepoint.h>
#include <linux/scatterlist.h>
#include "arm-smmu.h"
struct device;
DECLARE_EVENT_CLASS(iommu_tlbi,
TP_PROTO(struct arm_smmu_domain *domain),
TP_ARGS(domain),
TP_STRUCT__entry(
__string(group_name, dev_name(domain->dev))
),
TP_fast_assign(
__assign_str(group_name, dev_name(domain->dev));
),
TP_printk("group=%s",
__get_str(group_name)
)
);
DEFINE_EVENT(iommu_tlbi, tlbi_start,
TP_PROTO(struct arm_smmu_domain *domain),
TP_ARGS(domain)
);
DEFINE_EVENT(iommu_tlbi, tlbi_end,
TP_PROTO(struct arm_smmu_domain *domain),
TP_ARGS(domain)
);
DECLARE_EVENT_CLASS(iommu_pgtable,
TP_PROTO(struct arm_smmu_domain *domain, unsigned long iova,
unsigned long long ipa, size_t granule),
TP_ARGS(domain, iova, ipa, granule),
TP_STRUCT__entry(
__string(group_name, dev_name(domain->dev))
__field(unsigned long, iova)
__field(unsigned long long, ipa)
__field(size_t, granule)
),
TP_fast_assign(
__assign_str(group_name, dev_name(domain->dev));
__entry->iova = iova;
__entry->ipa = ipa;
__entry->granule = granule;
),
TP_printk("group=%s table_base_iova=%lx table_ipa=%llx table_size=%zx",
__get_str(group_name), __entry->iova,
__entry->ipa, __entry->granule
)
);
DEFINE_EVENT(iommu_pgtable, iommu_pgtable_add,
TP_PROTO(struct arm_smmu_domain *domain, unsigned long iova,
unsigned long long ipa, size_t granule),
TP_ARGS(domain, iova, ipa, granule)
);
DEFINE_EVENT(iommu_pgtable, iommu_pgtable_remove,
TP_PROTO(struct arm_smmu_domain *domain, unsigned long iova,
unsigned long long ipa, size_t granule),
TP_ARGS(domain, iova, ipa, granule)
);
DECLARE_EVENT_CLASS(iommu_map_pages,
TP_PROTO(struct arm_smmu_domain *domain, unsigned long iova,
size_t pgsize, size_t pgcount),
TP_ARGS(domain, iova, pgsize, pgcount),
TP_STRUCT__entry(
__string(group_name, dev_name(domain->dev))
__field(unsigned long, iova)
__field(size_t, pgsize)
__field(size_t, pgcount)
),
TP_fast_assign(
__assign_str(group_name, dev_name(domain->dev));
__entry->iova = iova;
__entry->pgsize = pgsize;
__entry->pgcount = pgcount;
),
TP_printk("group=%s iova=%lx size=%zx pgsize=%zx pgcount=%zx",
__get_str(group_name), __entry->iova,
__entry->pgsize * __entry->pgcount,
__entry->pgsize, __entry->pgcount
)
);
DEFINE_EVENT(iommu_map_pages, map_pages,
TP_PROTO(struct arm_smmu_domain *domain, unsigned long iova,
size_t pgsize, size_t pgcount),
TP_ARGS(domain, iova, pgsize, pgcount)
);
DEFINE_EVENT(iommu_map_pages, unmap_pages,
TP_PROTO(struct arm_smmu_domain *domain, unsigned long iova,
size_t pgsize, size_t pgcount),
TP_ARGS(domain, iova, pgsize, pgcount)
);
/* Refer to samples/ftrace_events */
#ifndef __TRACE_EVENT_ARM_SMMU_HELPER_FUNCTIONS
#define __TRACE_EVENT_ARM_SMMU_HELPER_FUNCTIONS
static inline unsigned long sum_scatterlist_length(struct scatterlist *sgl,
unsigned int nents)
{
int i = 0;
unsigned long sum = 0;
for (i = 0; i < nents; i++, sgl = sg_next(sgl))
sum += sgl->length;
return sum;
}
#endif
TRACE_EVENT(tlbsync_timeout,
TP_PROTO(struct device *dev),
TP_ARGS(dev),
TP_STRUCT__entry(
__string(device, dev_name(dev))
),
TP_fast_assign(
__assign_str(device, dev_name(dev));
),
TP_printk("smmu=%s",
__get_str(device)
)
);
TRACE_EVENT(smmu_init,
TP_PROTO(u64 time),
TP_ARGS(time),
TP_STRUCT__entry(
__field(u64, time)
),
TP_fast_assign(
__entry->time = time;
),
TP_printk("ARM SMMU init latency: %lld us", __entry->time)
);
#endif /* _TRACE_ARM_SMMU_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../drivers/iommu/arm/arm-smmu
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE arm-smmu-trace
/* This part must be outside protection */
#include <trace/define_trace.h>

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,8 @@
* Copyright (C) 2013 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
*
* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _ARM_SMMU_H
@@ -22,10 +24,14 @@
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/qcom-iommu-util.h>
#include <linux/qcom-io-pgtable.h>
/* Configuration registers */
#define ARM_SMMU_GR0_sCR0 0x0
#define ARM_SMMU_sCR0_VMID16EN BIT(31)
#define ARM_SMMU_sCR0_SHCFG GENMASK(23, 22)
#define ARM_SMMU_sCR0_SHCFG_NSH 0x3
#define ARM_SMMU_sCR0_BSU GENMASK(15, 14)
#define ARM_SMMU_sCR0_FB BIT(13)
#define ARM_SMMU_sCR0_PTM BIT(12)
@@ -117,6 +123,8 @@ enum arm_smmu_s2cr_type {
S2CR_TYPE_FAULT,
};
#define ARM_SMMU_S2CR_EXIDVALID BIT(10)
#define ARM_SMMU_S2CR_SHCFG GENMASK(9, 8)
#define ARM_SMMU_S2CR_SHCFG_NSH 0x3
#define ARM_SMMU_S2CR_CBNDX GENMASK(7, 0)
/* Context bank attribute registers */
@@ -136,12 +144,23 @@ enum arm_smmu_cbar_type {
#define ARM_SMMU_CBAR_VMID GENMASK(7, 0)
#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2))
#define CBFRSYNRA_SID_MASK (0xffff)
#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
#define ARM_SMMU_CBA2R_VMID16 GENMASK(31, 16)
#define ARM_SMMU_CBA2R_VA64 BIT(0)
#define ARM_SMMU_CB_SCTLR 0x0
#define ARM_SMMU_SCTLR_WACFG GENMASK(27, 26)
#define ARM_SMMU_SCTLR_WACFG_WA 0x2
#define ARM_SMMU_SCTLR_RACFG GENMASK(25, 24)
#define ARM_SMMU_SCTLR_RACFG_RA 0x2
#define ARM_SMMU_SCTLR_SHCFG GENMASK(23, 22)
#define ARM_SMMU_SCTLR_SHCFG_OSH 0x1
#define ARM_SMMU_SCTLR_SHCFG_NSH 0x3
#define ARM_SMMU_SCTLR_MTCFG BIT(20)
#define ARM_SMMU_SCTLR_MEM_ATTR GENMASK(19, 16)
#define ARM_SMMU_SCTLR_MEM_ATTR_OISH_WB_CACHE 0xf
#define ARM_SMMU_SCTLR_S1_ASIDPNE BIT(12)
#define ARM_SMMU_SCTLR_CFCFG BIT(7)
#define ARM_SMMU_SCTLR_HUPCF BIT(8)
@@ -156,6 +175,7 @@ enum arm_smmu_cbar_type {
#define ARM_SMMU_CB_RESUME 0x8
#define ARM_SMMU_RESUME_TERMINATE BIT(0)
#define ARM_SMMU_RESUME_RESUME 0
#define ARM_SMMU_CB_TCR2 0x10
#define ARM_SMMU_TCR2_SEP GENMASK(17, 15)
@@ -219,10 +239,19 @@ enum arm_smmu_cbar_type {
ARM_SMMU_FSR_TF | \
ARM_SMMU_FSR_IGN)
#define ARM_SMMU_CB_FSRRESTORE 0x5c
#define ARM_SMMU_CB_FAR 0x60
#define ARM_SMMU_CB_FSYNR0 0x68
#define ARM_SMMU_FSYNR0_WNR BIT(4)
#define ARM_SMMU_FSYNR0_PNU BIT(5)
#define ARM_SMMU_FSYNR0_IND BIT(6)
#define ARM_SMMU_FSYNR0_NSATTR BIT(8)
#define ARM_SMMU_CB_FSYNR1 0x6c
#define ARM_SMMU_FSYNR1_BID GENMASK(15, 13)
#define ARM_SMMU_FSYNR1_PID GENMASK(12, 8)
#define ARM_SMMU_FSYNR1_MID GENMASK(7, 0)
#define ARM_SMMU_CB_FSYNR1 0x6c
@@ -235,6 +264,24 @@ enum arm_smmu_cbar_type {
#define ARM_SMMU_CB_TLBSTATUS 0x7f4
#define ARM_SMMU_CB_ATS1PR 0x800
/* Implementation Defined Register Space 5 registers*/
/* Relative to IMPL_DEF5 page */
#define ARM_SMMU_STATS_SYNC_INV_TBU_ACK 0x5dc
#define TBU_SYNC_ACK GENMASK(31, 17)
#define TBU_SYNC_REQ BIT(16)
#define TBU_INV_ACK GENMASK(15, 1)
#define TBU_INV_REQ BIT(0)
#define APPS_SMMU_TBU_REG_ACCESS_REQ_NS 0x5f8
#define APPS_SMMU_TBU_REG_ACCESS_ACK_NS 0x5fc
/* Relative to SMMU_BASE */
#define ARM_SMMU_TBU_PWR_STATUS 0x2204
/* Relative SMMU_BASE */
#define ARM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR 0x2670
#define TCU_SYNC_IN_PRGSS BIT(20)
#define TCU_INV_IN_PRGSS BIT(16)
#define ARM_SMMU_CB_ATSR 0x8f0
#define ARM_SMMU_ATSR_ACTIVE BIT(0)
@@ -242,8 +289,9 @@ enum arm_smmu_cbar_type {
/* Maximum number of context banks per SMMU */
#define ARM_SMMU_MAX_CBS 128
#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
#define TLB_LOOP_TIMEOUT 500000 /* 500ms */
#define TLB_SPIN_COUNT 10
#define TLB_LOOP_INC_MAX 1000 /*1ms*/
/* Shared driver definitions */
enum arm_smmu_arch_version {
@@ -257,6 +305,33 @@ enum arm_smmu_implementation {
ARM_MMU500,
CAVIUM_SMMUV2,
QCOM_SMMUV2,
QCOM_SMMUV500,
};
/*
* Describes resources required for on/off power operation.
* Separate reference count is provided for atomic/nonatomic
* operations.
* gdscs - on kernel 6.6, power domains are used instead. This
* field can be removed once no legacy targets using it remain.
*/
struct arm_smmu_power_resources {
struct device *dev;
struct clk **clocks;
int num_clocks;
struct regulator_bulk_data *gdscs;
int num_gdscs;
struct icc_path *icc_path;
/* Protects power_count */
struct mutex power_lock;
int power_count;
int (*resume)(struct arm_smmu_power_resources *pwr);
void (*suspend)(struct arm_smmu_power_resources *pwr);
};
struct arm_smmu_s2cr {
@@ -265,6 +340,7 @@ struct arm_smmu_s2cr {
enum arm_smmu_s2cr_type type;
enum arm_smmu_s2cr_privcfg privcfg;
u8 cbndx;
bool pinned;
};
struct arm_smmu_smr {
@@ -272,6 +348,7 @@ struct arm_smmu_smr {
u16 id;
bool valid;
bool pinned;
bool used;
};
struct arm_smmu_device {
@@ -297,6 +374,14 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_EXIDS (1 << 12)
u32 features;
#define ARM_SMMU_OPT_FATAL_ASF (1 << 0)
#define ARM_SMMU_OPT_3LVL_TABLES (1 << 2)
#define ARM_SMMU_OPT_NO_ASID_RETENTION (1 << 3)
#define ARM_SMMU_OPT_DISABLE_ATOS (1 << 4)
#define ARM_SMMU_OPT_CONTEXT_FAULT_RETRY (1 << 5)
#define ARM_SMMU_OPT_MULTI_MATCH_HANDOFF_SMR (1 << 6)
#define ARM_SMMU_OPT_IGNORE_NUMPAGENDXB (1 << 7)
u32 options;
enum arm_smmu_arch_version version;
enum arm_smmu_implementation model;
const struct arm_smmu_impl *impl;
@@ -328,6 +413,17 @@ struct arm_smmu_device {
/* IOMMU core code handle */
struct iommu_device iommu;
/* Specific to QCOM */
struct arm_smmu_impl_def_reg *impl_def_attach_registers;
unsigned int num_impl_def_attach_registers;
struct arm_smmu_power_resources *pwr;
/* used for qsmmuv500 scm_io_readl */
phys_addr_t phys_addr;
unsigned long sync_timed_out;
};
enum arm_smmu_context_fmt {
@@ -344,6 +440,19 @@ struct arm_smmu_cfg {
u16 asid;
u16 vmid;
};
u32 procid;
struct {
u32 wacfg:2;
u32 racfg:2;
u32 shcfg:2;
u32 mtcfg:1;
u32 memattr:4;
u32 hupcf:1;
u32 cfcfg:1;
u32 cfre:1;
u32 m:1;
} sctlr;
enum arm_smmu_cbar_type cbar;
enum arm_smmu_context_fmt fmt;
bool flush_walk_prefer_tlbiasid;
@@ -354,6 +463,7 @@ struct arm_smmu_cb {
u64 ttbr[2];
u32 tcr[2];
u32 mair[2];
u32 sctlr;
struct arm_smmu_cfg *cfg;
};
@@ -364,16 +474,63 @@ enum arm_smmu_domain_stage {
ARM_SMMU_DOMAIN_BYPASS,
};
struct arm_smmu_fault_model {
char non_fatal : 1;
char no_cfre : 1;
char no_stall : 1;
char hupcf : 1;
};
struct arm_smmu_mapping_cfg {
char s1_bypass : 1;
char atomic : 1;
char fast : 1;
};
struct qcom_iommu_fault_param {
struct device *dev;
fault_handler_irq_t handler;
void *token;
};
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
struct device *dev;
struct io_pgtable_ops *pgtbl_ops;
unsigned long pgtbl_quirks;
bool force_coherent_walk;
const struct iommu_flush_ops *flush_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
struct mutex init_mutex; /* Protects smmu pointer */
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
spinlock_t cb_lock; /* Serialises ATS1* ops */
spinlock_t sync_lock; /* Serialises TLB syncs */
struct arm_smmu_fault_model fault_model;
struct arm_smmu_mapping_cfg mapping_cfg;
bool delayed_s1_trans_enable;
u32 secure_vmid;
fault_handler_irq_t fault_handler_irq;
void *handler_irq_token;
struct qcom_iommu_fault_param fault_param;
/*
* Track PMDs which require tlb invalidate prior to being
* freed, or before their iovas can be reused by iommu_map().
*/
spinlock_t iotlb_gather_lock;
struct list_head *freelist;
bool deferred_flush;
struct iommu_domain domain;
/* mapping_cfg.atomic indicates that runtime power management should be disabled. */
bool rpm_always_on;
/* skip tlb management. */
bool skip_tlb_management;
#ifdef CONFIG_ARM_SMMU_CONTEXT_FAULT_RETRY
u64 prev_fault_address;
u32 fault_retry_counter;
#endif
};
struct arm_smmu_master_cfg {
@@ -420,7 +577,28 @@ static inline u32 arm_smmu_lpae_vtcr(const struct io_pgtable_cfg *cfg)
FIELD_PREP(ARM_SMMU_VTCR_T0SZ, cfg->arm_lpae_s2_cfg.vtcr.tsz);
}
static inline u32 arm_smmu_lpae_sctlr(struct arm_smmu_cfg *cfg)
{
bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
return FIELD_PREP(ARM_SMMU_SCTLR_WACFG, cfg->sctlr.wacfg) |
FIELD_PREP(ARM_SMMU_SCTLR_RACFG, cfg->sctlr.racfg) |
FIELD_PREP(ARM_SMMU_SCTLR_SHCFG, cfg->sctlr.shcfg) |
FIELD_PREP(ARM_SMMU_SCTLR_MTCFG, cfg->sctlr.mtcfg) |
FIELD_PREP(ARM_SMMU_SCTLR_MEM_ATTR, cfg->sctlr.memattr) |
FIELD_PREP(ARM_SMMU_SCTLR_S1_ASIDPNE, stage1) |
FIELD_PREP(ARM_SMMU_SCTLR_HUPCF, cfg->sctlr.hupcf) |
FIELD_PREP(ARM_SMMU_SCTLR_CFCFG, cfg->sctlr.cfcfg) |
ARM_SMMU_SCTLR_CFIE |
FIELD_PREP(ARM_SMMU_SCTLR_CFRE, cfg->sctlr.cfre) |
FIELD_PREP(ARM_SMMU_SCTLR_E, IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) |
ARM_SMMU_SCTLR_AFE |
ARM_SMMU_SCTLR_TRE |
FIELD_PREP(ARM_SMMU_SCTLR_M, cfg->sctlr.m);
}
/* Implementation details, yay! */
struct arm_smmu_impl {
u32 (*read_reg)(struct arm_smmu_device *smmu, int page, int offset);
void (*write_reg)(struct arm_smmu_device *smmu, int page, int offset,
@@ -432,6 +610,13 @@ struct arm_smmu_impl {
int (*reset)(struct arm_smmu_device *smmu);
int (*init_context)(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *cfg, struct device *dev);
void (*init_context_bank)(struct arm_smmu_domain *smmu_domain,
struct device *dev);
phys_addr_t (*iova_to_phys_hard)(struct arm_smmu_domain *smmu_domain,
struct qcom_iommu_atos_txn *txn);
void (*tlb_sync_timeout)(struct arm_smmu_device *smmu);
void (*device_remove)(struct arm_smmu_device *smmu);
int (*device_group)(struct device *dev, struct iommu_group *group);
void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync,
int status);
int (*def_domain_type)(struct device *dev);
@@ -503,6 +688,15 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
#define ARM_SMMU_GR0 0
#define ARM_SMMU_GR1 1
/*
* Implementation defined space starts after SMMU GR space, so IMPL_DEF page n
* is page n + 2 in the SMMU register space.
*/
#define ARM_SMMU_IMPL_DEF0 2
#define ARM_SMMU_IMPL_DEF4 6
#define ARM_SMMU_IMPL_DEF5 7
#define ARM_SMMU_CB(s, n) ((s)->numpage + (n))
#define arm_smmu_gr0_read(s, o) \
@@ -527,8 +721,22 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu);
struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu);
struct arm_smmu_device *qsmmuv500_impl_init(struct arm_smmu_device *smmu);
struct arm_smmu_device *qsmmuv2_impl_init(struct arm_smmu_device *smmu);
struct arm_smmu_device *qcom_adreno_smmu_impl_init(struct arm_smmu_device *smmu);
void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx);
int arm_mmu500_reset(struct arm_smmu_device *smmu);
int arm_smmu_power_on(struct arm_smmu_power_resources *pwr);
void arm_smmu_power_off(struct arm_smmu_device *smmu,
struct arm_smmu_power_resources *pwr);
struct arm_smmu_power_resources *arm_smmu_init_power_resources(
struct device *dev);
extern struct platform_driver qsmmuv500_tbu_driver;
/* Misc. constants */
#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
#endif /* _ARM_SMMU_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,841 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#define pr_fmt(fmt) "io-pgtable-fast: " fmt
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/scatterlist.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/io-pgtable.h>
#include <linux/io-pgtable-fast.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/dma-mapping.h>
#include <linux/qcom-iommu-util.h>
#include <linux/qcom-io-pgtable.h>
#define AV8L_FAST_MAX_ADDR_BITS 48
/* Page table bits */
#define AV8L_FAST_PTE_TYPE_SHIFT 0
#define AV8L_FAST_PTE_TYPE_MASK 0x3
#define AV8L_FAST_PTE_TYPE_BLOCK 1
#define AV8L_FAST_PTE_TYPE_TABLE 3
#define AV8L_FAST_PTE_TYPE_PAGE 3
#define AV8L_FAST_PTE_NSTABLE (((av8l_fast_iopte)1) << 63)
#define AV8L_FAST_PTE_XN (((av8l_fast_iopte)3) << 53)
#define AV8L_FAST_PTE_AF (((av8l_fast_iopte)1) << 10)
#define AV8L_FAST_PTE_SH_NS (((av8l_fast_iopte)0) << 8)
#define AV8L_FAST_PTE_SH_OS (((av8l_fast_iopte)2) << 8)
#define AV8L_FAST_PTE_SH_IS (((av8l_fast_iopte)3) << 8)
#define AV8L_FAST_PTE_SH_MASK (((av8l_fast_iopte)3) << 8)
#define AV8L_FAST_PTE_NS (((av8l_fast_iopte)1) << 5)
#define AV8L_FAST_PTE_VALID (((av8l_fast_iopte)1) << 0)
#define AV8L_FAST_PTE_ATTR_LO_MASK (((av8l_fast_iopte)0x3ff) << 2)
/* Ignore the contiguous bit for block splitting */
#define AV8L_FAST_PTE_ATTR_HI_MASK (((av8l_fast_iopte)6) << 52)
#define AV8L_FAST_PTE_ATTR_MASK (AV8L_FAST_PTE_ATTR_LO_MASK | \
AV8L_FAST_PTE_ATTR_HI_MASK)
#define AV8L_FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000)
/* Stage-1 PTE */
#define AV8L_FAST_PTE_AP_UNPRIV (((av8l_fast_iopte)1) << 6)
#define AV8L_FAST_PTE_AP_RDONLY (((av8l_fast_iopte)2) << 6)
#define AV8L_FAST_PTE_ATTRINDX_SHIFT 2
#define AV8L_FAST_PTE_ATTRINDX_MASK 0x7
#define AV8L_FAST_PTE_nG (((av8l_fast_iopte)1) << 11)
/* Stage-2 PTE */
#define AV8L_FAST_PTE_HAP_FAULT (((av8l_fast_iopte)0) << 6)
#define AV8L_FAST_PTE_HAP_READ (((av8l_fast_iopte)1) << 6)
#define AV8L_FAST_PTE_HAP_WRITE (((av8l_fast_iopte)2) << 6)
#define AV8L_FAST_PTE_MEMATTR_OIWB (((av8l_fast_iopte)0xf) << 2)
#define AV8L_FAST_PTE_MEMATTR_NC (((av8l_fast_iopte)0x5) << 2)
#define AV8L_FAST_PTE_MEMATTR_DEV (((av8l_fast_iopte)0x1) << 2)
/* Register bits */
#define ARM_32_LPAE_TCR_EAE (1 << 31)
#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31)
#define AV8L_FAST_TCR_TG0_4K (0 << 14)
#define AV8L_FAST_TCR_TG0_64K (1 << 14)
#define AV8L_FAST_TCR_TG0_16K (2 << 14)
#define AV8L_FAST_TCR_SH0_SHIFT 12
#define AV8L_FAST_TCR_SH0_MASK 0x3
#define AV8L_FAST_TCR_SH_NS 0
#define AV8L_FAST_TCR_SH_OS 2
#define AV8L_FAST_TCR_SH_IS 3
#define AV8L_FAST_TCR_ORGN0_SHIFT 10
#define AV8L_FAST_TCR_IRGN0_SHIFT 8
#define AV8L_FAST_TCR_RGN_MASK 0x3
#define AV8L_FAST_TCR_RGN_NC 0
#define AV8L_FAST_TCR_RGN_WBWA 1
#define AV8L_FAST_TCR_RGN_WT 2
#define AV8L_FAST_TCR_RGN_WB 3
#define AV8L_FAST_TCR_SL0_SHIFT 6
#define AV8L_FAST_TCR_SL0_MASK 0x3
#define AV8L_FAST_TCR_T0SZ_SHIFT 0
#define AV8L_FAST_TCR_SZ_MASK 0xf
#define AV8L_FAST_TCR_PS_SHIFT 16
#define AV8L_FAST_TCR_PS_MASK 0x7
#define AV8L_FAST_TCR_IPS_SHIFT 32
#define AV8L_FAST_TCR_IPS_MASK 0x7
#define AV8L_FAST_TCR_PS_32_BIT 0x0ULL
#define AV8L_FAST_TCR_PS_36_BIT 0x1ULL
#define AV8L_FAST_TCR_PS_40_BIT 0x2ULL
#define AV8L_FAST_TCR_PS_42_BIT 0x3ULL
#define AV8L_FAST_TCR_PS_44_BIT 0x4ULL
#define AV8L_FAST_TCR_PS_48_BIT 0x5ULL
#define AV8L_FAST_TCR_EPD1_SHIFT 23
#define AV8L_FAST_TCR_EPD1_FAULT 1
#define AV8L_FAST_MAIR_ATTR_SHIFT(n) ((n) << 3)
#define AV8L_FAST_MAIR_ATTR_MASK 0xff
#define AV8L_FAST_MAIR_ATTR_DEVICE 0x04
#define AV8L_FAST_MAIR_ATTR_NC 0x44
#define AV8L_FAST_MAIR_ATTR_WBRWA 0xff
#define AV8L_FAST_MAIR_ATTR_UPSTREAM 0xf4
#define AV8L_FAST_MAIR_ATTR_IDX_NC 0
#define AV8L_FAST_MAIR_ATTR_IDX_CACHE 1
#define AV8L_FAST_MAIR_ATTR_IDX_DEV 2
#define AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM 3
#define AV8L_FAST_PAGE_SHIFT 12
#define PTE_MAIR_IDX(pte) \
((pte >> AV8L_FAST_PTE_ATTRINDX_SHIFT) & \
AV8L_FAST_PTE_ATTRINDX_MASK)
#define PTE_SH_IDX(pte) (pte & AV8L_FAST_PTE_SH_MASK)
#define iopte_pmd_offset(pmds, base, iova) (pmds + ((iova - base) >> 12))
static inline dma_addr_t av8l_dma_addr(void *addr)
{
if (is_vmalloc_addr(addr))
return page_to_phys(vmalloc_to_page(addr)) +
offset_in_page(addr);
return virt_to_phys(addr);
}
static void __av8l_clean_range(struct device *dev, void *start, void *end)
{
size_t size;
void *region_end;
unsigned long page_end;
if (is_vmalloc_addr(start)) {
while (start < end) {
page_end = round_down((unsigned long)start + PAGE_SIZE,
PAGE_SIZE);
region_end = min_t(void *, end, page_end);
size = region_end - start;
dma_sync_single_for_device(dev, av8l_dma_addr(start),
size, DMA_TO_DEVICE);
start = region_end;
}
} else {
size = end - start;
dma_sync_single_for_device(dev, av8l_dma_addr(start), size,
DMA_TO_DEVICE);
}
}
static void av8l_clean_range(struct io_pgtable_cfg *cfg, av8l_fast_iopte *start,
av8l_fast_iopte *end)
{
if (!cfg->coherent_walk)
__av8l_clean_range(cfg->iommu_dev, start, end);
}
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
#include <linux/notifier.h>
static ATOMIC_NOTIFIER_HEAD(av8l_notifier_list);
void av8l_register_notify(struct notifier_block *nb)
{
atomic_notifier_chain_register(&av8l_notifier_list, nb);
}
EXPORT_SYMBOL(av8l_register_notify);
static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
{
if (unlikely(*ptep)) {
atomic_notifier_call_chain(
&av8l_notifier_list, MAPPED_OVER_STALE_TLB,
(void *) ptep);
pr_err("Tried to map over a non-vacant pte: 0x%llx @ %p\n",
*ptep, ptep);
pr_err("Nearby memory:\n");
print_hex_dump(KERN_ERR, "pgtbl: ", DUMP_PREFIX_ADDRESS,
32, 8, ptep - 16, 32 * sizeof(*ptep), false);
}
}
void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base,
u64 end, bool skip_sync)
{
int i;
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops);
av8l_fast_iopte *pmdp = iopte_pmd_offset(data->pmds, data->base, base);
for (i = base >> AV8L_FAST_PAGE_SHIFT;
i <= (end >> AV8L_FAST_PAGE_SHIFT); ++i) {
if (!(*pmdp & AV8L_FAST_PTE_VALID)) {
*pmdp = 0;
if (!skip_sync)
av8l_clean_range(&iop->cfg, pmdp, pmdp + 1);
}
pmdp++;
}
}
#else
static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
{
}
#endif
static av8l_fast_iopte
av8l_fast_prot_to_pte(struct av8l_fast_io_pgtable *data, int prot)
{
av8l_fast_iopte pte = AV8L_FAST_PTE_TYPE_PAGE
| AV8L_FAST_PTE_AF
| AV8L_FAST_PTE_nG;
if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
pte |= AV8L_FAST_PTE_AP_RDONLY;
if (!(prot & IOMMU_PRIV))
pte |= AV8L_FAST_PTE_AP_UNPRIV;
if (prot & IOMMU_MMIO)
pte |= (AV8L_FAST_MAIR_ATTR_IDX_DEV
<< AV8L_FAST_PTE_ATTRINDX_SHIFT);
else if (prot & IOMMU_CACHE)
pte |= (AV8L_FAST_MAIR_ATTR_IDX_CACHE
<< AV8L_FAST_PTE_ATTRINDX_SHIFT);
else if (prot & IOMMU_SYS_CACHE)
pte |= (AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM
<< AV8L_FAST_PTE_ATTRINDX_SHIFT);
if (prot & IOMMU_CACHE)
pte |= AV8L_FAST_PTE_SH_IS;
else
pte |= AV8L_FAST_PTE_SH_OS;
if (prot & IOMMU_NOEXEC)
pte |= AV8L_FAST_PTE_XN;
return pte;
}
static int av8l_fast_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops);
av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova);
unsigned long i, nptes = size >> AV8L_FAST_PAGE_SHIFT;
av8l_fast_iopte pte;
pte = av8l_fast_prot_to_pte(data, prot);
paddr &= AV8L_FAST_PTE_ADDR_MASK;
for (i = 0; i < nptes; i++, paddr += SZ_4K) {
__av8l_check_for_stale_tlb(ptep + i);
*(ptep + i) = pte | paddr;
}
av8l_clean_range(&iop->cfg, ptep, ptep + nptes);
return 0;
}
int av8l_fast_map_public(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
return av8l_fast_map(ops, iova, paddr, size, prot, GFP_ATOMIC);
}
static int av8l_fast_map_pages(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr,
size_t pgsize, size_t pgcount, int prot, gfp_t gfp,
size_t *mapped)
{
int ret = av8l_fast_map(ops, iova, paddr, pgsize * pgcount, prot, gfp);
if (!ret)
*mapped = pgsize * pgcount;
return ret;
}
static size_t
__av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, bool allow_stale_tlb)
{
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops);
unsigned long nptes;
av8l_fast_iopte *ptep;
int val = allow_stale_tlb
? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI
: 0;
ptep = iopte_pmd_offset(data->pmds, data->base, iova);
nptes = size >> AV8L_FAST_PAGE_SHIFT;
memset(ptep, val, sizeof(*ptep) * nptes);
av8l_clean_range(&iop->cfg, ptep, ptep + nptes);
if (!allow_stale_tlb)
io_pgtable_tlb_flush_all(&data->iop);
return size;
}
/* caller must take care of tlb cache maintenance */
void av8l_fast_unmap_public(struct io_pgtable_ops *ops, unsigned long iova,
size_t size)
{
__av8l_fast_unmap(ops, iova, size, true);
}
static size_t av8l_fast_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, size_t pgsize,
size_t pgcount, struct iommu_iotlb_gather *gather)
{
return __av8l_fast_unmap(ops, iova, pgsize * pgcount, false);
}
/* TODO: Add this back in android-mainline */
static int __maybe_unused av8l_fast_map_sg(struct io_pgtable_ops *ops,
unsigned long iova, struct scatterlist *sgl,
unsigned int nents, int prot, gfp_t gfp, size_t *mapped)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nents, i) {
av8l_fast_map(ops, iova, sg_phys(sg), sg->length, prot, gfp);
iova += sg->length;
*mapped += sg->length;
}
return 0;
}
int av8l_fast_map_sg_public(struct io_pgtable_ops *ops,
unsigned long iova, struct scatterlist *sgl,
unsigned int nents, int prot, size_t *mapped)
{
return av8l_fast_map_sg(ops, iova, sgl, nents, prot, GFP_ATOMIC, mapped);
}
#if defined(CONFIG_ARM64)
#define FAST_PGDNDX(va) (((va) & 0x7fc0000000) >> 27)
#elif defined(CONFIG_ARM)
#define FAST_PGDNDX(va) (((va) & 0xc0000000) >> 27)
#endif
static phys_addr_t av8l_fast_iova_to_phys(struct io_pgtable_ops *ops,
unsigned long iova)
{
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
av8l_fast_iopte pte, *pgdp, *pudp, *pmdp;
unsigned long pgd;
phys_addr_t phys;
const unsigned long pts = AV8L_FAST_PTE_TYPE_SHIFT;
const unsigned long ptm = AV8L_FAST_PTE_TYPE_MASK;
const unsigned long ptt = AV8L_FAST_PTE_TYPE_TABLE;
const unsigned long ptp = AV8L_FAST_PTE_TYPE_PAGE;
const av8l_fast_iopte am = AV8L_FAST_PTE_ADDR_MASK;
/* TODO: clean up some of these magic numbers... */
pgd = (unsigned long)data->pgd | FAST_PGDNDX(iova);
pgdp = (av8l_fast_iopte *)pgd;
pte = *pgdp;
if (((pte >> pts) & ptm) != ptt)
return 0;
pudp = phys_to_virt((pte & am) | ((iova & 0x3fe00000) >> 18));
pte = *pudp;
if (((pte >> pts) & ptm) != ptt)
return 0;
pmdp = phys_to_virt((pte & am) | ((iova & 0x1ff000) >> 9));
pte = *pmdp;
if (((pte >> pts) & ptm) != ptp)
return 0;
phys = pte & am;
return phys | (iova & 0xfff);
}
phys_addr_t av8l_fast_iova_to_phys_public(struct io_pgtable_ops *ops,
unsigned long iova)
{
return av8l_fast_iova_to_phys(ops, iova);
}
static bool av8l_fast_iova_coherent(struct io_pgtable_ops *ops,
unsigned long iova)
{
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova);
return ((PTE_MAIR_IDX(*ptep) == AV8L_FAST_MAIR_ATTR_IDX_CACHE) &&
((PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_OS) ||
(PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_IS)));
}
bool av8l_fast_iova_coherent_public(struct io_pgtable_ops *ops,
unsigned long iova)
{
return av8l_fast_iova_coherent(ops, iova);
}
static struct av8l_fast_io_pgtable *
av8l_fast_alloc_pgtable_data(struct io_pgtable_cfg *cfg)
{
struct av8l_fast_io_pgtable *data;
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->iop.ops = (struct io_pgtable_ops) {
.map_pages = av8l_fast_map_pages,
.unmap_pages = av8l_fast_unmap_pages,
.iova_to_phys = av8l_fast_iova_to_phys,
};
return data;
}
/*
* We need max 1 page for the pgd, 4 pages for puds (1GB VA per pud page) and
* 2048 pages for pmds (each pud page contains 512 table entries, each
* pointing to a pmd).
*/
#define NUM_PGD_PAGES 1
#define NUM_PUD_PAGES 4
#define NUM_PMD_PAGES 2048
#define NUM_PGTBL_PAGES (NUM_PGD_PAGES + NUM_PUD_PAGES + NUM_PMD_PAGES)
/* undefine arch specific definitions which depends on page table format */
#undef pud_index
#undef pud_mask
#undef pud_next
#undef pmd_index
#undef pmd_mask
#undef pmd_next
#define pud_index(addr) (((addr) >> 30) & 0x3)
#define pud_mask(addr) ((addr) & ~((1UL << 30) - 1))
#define pud_next(addr, end) \
({ unsigned long __boundary = pud_mask(addr + (1UL << 30));\
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})
#define pmd_index(addr) (((addr) >> 21) & 0x1ff)
#define pmd_mask(addr) ((addr) & ~((1UL << 21) - 1))
#define pmd_next(addr, end) \
({ unsigned long __boundary = pmd_mask(addr + (1UL << 21));\
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})
static int
av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data,
struct io_pgtable_cfg *cfg, void *cookie)
{
int i, j, pg = 0;
struct page **pages, *page;
struct qcom_io_pgtable_info *pgtbl_info = to_qcom_io_pgtable_info(cfg);
dma_addr_t pud, pmd;
int pmd_pg_index;
dma_addr_t base = pgtbl_info->iova_base;
dma_addr_t end = pgtbl_info->iova_end;
pages = kmalloc(sizeof(*pages) * NUM_PGTBL_PAGES, __GFP_NOWARN |
__GFP_NORETRY);
if (!pages)
pages = vmalloc(sizeof(*pages) * NUM_PGTBL_PAGES);
if (!pages)
return -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_free_pages_arr;
pages[pg++] = page;
data->pgd = page_address(page);
/*
* We need max 2048 entries at level 2 to map 4GB of VA space. A page
* can hold 512 entries, so we need max 4 pages.
*/
for (i = pud_index(base), pud = base; pud < end;
++i, pud = pud_next(pud, end)) {
av8l_fast_iopte pte, *ptep;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_free_pages;
pages[pg++] = page;
data->puds[i] = page_address(page);
pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE;
ptep = ((av8l_fast_iopte *)data->pgd) + i;
*ptep = pte;
}
av8l_clean_range(cfg, data->pgd, data->pgd + 4);
/*
* We have max 4 puds, each of which can point to 512 pmds, so we'll
* have max 2048 pmds, each of which can hold 512 ptes, for a grand
* total of 2048*512=1048576 PTEs.
*/
pmd_pg_index = pg;
for (i = pud_index(base), pud = base; pud < end;
++i, pud = pud_next(pud, end)) {
for (j = pmd_index(pud), pmd = pud; pmd < pud_next(pud, end);
++j, pmd = pmd_next(pmd, end)) {
av8l_fast_iopte pte, *pudp;
void *addr;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_free_pages;
pages[pg++] = page;
addr = page_address(page);
av8l_clean_range(cfg, addr, addr + SZ_4K);
pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE;
pudp = data->puds[i] + j;
*pudp = pte;
}
av8l_clean_range(cfg, data->puds[i], data->puds[i] + 512);
}
/*
* We map the pmds into a virtually contiguous space so that we
* don't have to traverse the first two levels of the page tables
* to find the appropriate pud. Instead, it will be a simple
* offset from the virtual base of the pmds.
*/
data->pmds = vmap(&pages[pmd_pg_index], pg - pmd_pg_index,
VM_IOREMAP, PAGE_KERNEL);
if (!data->pmds)
goto err_free_pages;
data->pages = pages;
data->base = base;
data->end = end;
data->nr_pages = pg;
return 0;
err_free_pages:
for (i = 0; i < pg; ++i)
__free_page(pages[i]);
err_free_pages_arr:
kvfree(pages);
return -ENOMEM;
}
static struct io_pgtable *
av8l_fast_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
u64 reg;
struct av8l_fast_io_pgtable *data =
av8l_fast_alloc_pgtable_data(cfg);
typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
if (!data)
return NULL;
/* restrict according to the fast map requirements */
cfg->ias = 32;
cfg->pgsize_bitmap = SZ_4K;
/* TCR */
if (cfg->coherent_walk) {
tcr->sh = AV8L_FAST_TCR_SH_IS;
tcr->irgn = AV8L_FAST_TCR_RGN_WBWA;
tcr->orgn = AV8L_FAST_TCR_RGN_WBWA;
if (WARN_ON(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
goto out_free_data;
} else {
tcr->sh = AV8L_FAST_TCR_SH_OS;
tcr->irgn = AV8L_FAST_TCR_RGN_NC;
if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
tcr->orgn = AV8L_FAST_TCR_RGN_NC;
else
tcr->orgn = AV8L_FAST_TCR_RGN_WBWA;
}
tcr->tg = AV8L_FAST_TCR_TG0_4K;
switch (cfg->oas) {
case 32:
tcr->ips = AV8L_FAST_TCR_PS_32_BIT;
break;
case 36:
tcr->ips = AV8L_FAST_TCR_PS_36_BIT;
break;
case 40:
tcr->ips = AV8L_FAST_TCR_PS_40_BIT;
break;
case 42:
tcr->ips = AV8L_FAST_TCR_PS_42_BIT;
break;
case 44:
tcr->ips = AV8L_FAST_TCR_PS_44_BIT;
break;
case 48:
tcr->ips = AV8L_FAST_TCR_PS_48_BIT;
break;
default:
goto out_free_data;
}
tcr->tsz = 64ULL - cfg->ias;
/* MAIRs */
reg = (AV8L_FAST_MAIR_ATTR_NC
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_NC)) |
(AV8L_FAST_MAIR_ATTR_WBRWA
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_CACHE)) |
(AV8L_FAST_MAIR_ATTR_DEVICE
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_DEV)) |
(AV8L_FAST_MAIR_ATTR_UPSTREAM
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM));
cfg->arm_lpae_s1_cfg.mair = reg;
/* Allocate all page table memory! */
if (av8l_fast_prepopulate_pgtables(data, cfg, cookie))
goto out_free_data;
/* TTBRs */
cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
return &data->iop;
out_free_data:
kfree(data);
return NULL;
}
static void av8l_fast_free_pgtable(struct io_pgtable *iop)
{
int i;
struct av8l_fast_io_pgtable *data = iof_pgtable_to_data(iop);
vunmap(data->pmds);
for (i = 0; i < data->nr_pages; ++i)
__free_page(data->pages[i]);
kvfree(data->pages);
kfree(data);
}
struct io_pgtable_init_fns io_pgtable_av8l_fast_init_fns = {
.alloc = av8l_fast_alloc_pgtable,
.free = av8l_fast_free_pgtable,
};
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_SELFTEST
#include <linux/dma-map-ops.h>
static struct io_pgtable_cfg *cfg_cookie;
static void dummy_tlb_flush_all(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule, void *cookie)
{
dummy_tlb_flush(iova, granule, granule, cookie);
}
static struct iommu_flush_ops dummy_tlb_ops __initdata = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_flush_walk = dummy_tlb_flush,
.tlb_add_page = dummy_tlb_add_page,
};
/*
* Returns true if the iova range is successfully mapped to the contiguous
* phys range in ops.
*/
static bool av8l_fast_range_has_specific_mapping(struct io_pgtable_ops *ops,
const unsigned long iova_start,
const phys_addr_t phys_start,
const size_t size)
{
u64 iova = iova_start;
phys_addr_t phys = phys_start;
while (iova < (iova_start + size)) {
/* + 42 just to make sure offsetting is working */
if (ops->iova_to_phys(ops, iova + 42) != (phys + 42))
return false;
iova += SZ_4K;
phys += SZ_4K;
}
return true;
}
static int __init av8l_fast_positive_testing(void)
{
int failed = 0;
u64 iova;
struct io_pgtable_ops *ops;
struct qcom_io_pgtable_info pgtable_info;
struct av8l_fast_io_pgtable *data;
av8l_fast_iopte *pmds;
u64 max = SZ_1G * 4ULL - 1;
u64 base = 0;
pgtable_info.iova_base = base;
pgtable_info.iova_end = max;
pgtable_info.cfg = (struct io_pgtable_cfg) {
.quirks = 0,
.tlb = &dummy_tlb_ops,
.ias = 32,
.oas = 32,
.pgsize_bitmap = SZ_4K,
.coherent_walk = true,
};
cfg_cookie = &pgtable_info.pgtbl_cfg;
ops = alloc_io_pgtable_ops(ARM_V8L_FAST, &pgtable_info.pgtbl_cfg,
&pgtable_info.pgtbl_cfg);
if (WARN_ON(!ops))
return 1;
data = iof_pgtable_ops_to_data(ops);
pmds = data->pmds;
/* map the entire 4GB VA space with 4K map calls */
for (iova = base; iova < max; iova += SZ_4K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_4K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
base, max - base)))
failed++;
/* unmap it all */
for (iova = base; iova < max; iova += SZ_4K) {
if (WARN_ON(ops->unmap(ops, iova, SZ_4K, NULL) != SZ_4K))
failed++;
}
/* sweep up TLB proving PTEs */
av8l_fast_clear_stale_ptes(ops, base, max, false);
/* map the entire 4GB VA space with 8K map calls */
for (iova = base; iova < max; iova += SZ_8K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
base, max - base)))
failed++;
/* unmap it all with 8K unmap calls */
for (iova = base; iova < max; iova += SZ_8K) {
if (WARN_ON(ops->unmap(ops, iova, SZ_8K, NULL) != SZ_8K))
failed++;
}
/* sweep up TLB proving PTEs */
av8l_fast_clear_stale_ptes(ops, base, max, false);
/* map the entire 4GB VA space with 16K map calls */
for (iova = base; iova < max; iova += SZ_16K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
base, max - base)))
failed++;
/* unmap it all */
for (iova = base; iova < max; iova += SZ_16K) {
if (WARN_ON(ops->unmap(ops, iova, SZ_16K, NULL) != SZ_16K))
failed++;
}
/* sweep up TLB proving PTEs */
av8l_fast_clear_stale_ptes(ops, base, max, false);
/* map the entire 4GB VA space with 64K map calls */
for (iova = base; iova < max; iova += SZ_64K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
base, max - base)))
failed++;
/* unmap it all at once */
if (WARN_ON(ops->unmap(ops, base, max - base, NULL) != (max - base)))
failed++;
free_io_pgtable_ops(ops);
return failed;
}
static int __init av8l_fast_do_selftests(void)
{
int failed = 0;
failed += av8l_fast_positive_testing();
pr_err("selftest: completed with %d failures\n", failed);
return 0;
}
subsys_initcall(av8l_fast_do_selftests);
#endif

View File

@@ -0,0 +1,188 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/bitfield.h>
#include <linux/module.h>
#include <linux/iommu.h>
#include <linux/qcom-io-pgtable.h>
#include <linux/slab.h>
#include "iommu-logger.h"
static DEFINE_MUTEX(iommu_debug_attachments_lock);
static LIST_HEAD(iommu_debug_attachments);
static unsigned int iommu_logger_pgtable_levels(struct io_pgtable *iop)
{
unsigned int va_bits, pte_size, bits_per_level, pg_shift;
unsigned long ias = iop->cfg.ias;
switch ((u32)iop->fmt) {
case ARM_32_LPAE_S1:
case ARM_64_LPAE_S1:
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
case ARM_V8L_FAST:
#endif
case QCOM_ARM_64_LPAE_S1:
pte_size = sizeof(u64);
break;
default:
return 0;
}
pg_shift = __ffs(iop->cfg.pgsize_bitmap);
bits_per_level = pg_shift - ilog2(pte_size);
va_bits = ias - pg_shift;
return DIV_ROUND_UP(va_bits, bits_per_level);
}
static enum iommu_logger_pgtable_fmt iommu_logger_pgtable_fmt_lut(
enum io_pgtable_fmt fmt)
{
switch ((u32)fmt) {
case ARM_32_LPAE_S1:
return IOMMU_LOGGER_ARM_32_LPAE_S1;
case ARM_64_LPAE_S1:
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
case ARM_V8L_FAST:
#endif
case QCOM_ARM_64_LPAE_S1:
return IOMMU_LOGGER_ARM_64_LPAE_S1;
default:
return IOMMU_LOGGER_MAX_PGTABLE_FMTS;
}
}
static int iommu_logger_domain_ttbrs(struct io_pgtable *iop, void **ttbr0_ptr,
void **ttbr1_ptr)
{
int ret;
u64 ttbr0;
switch ((u32)iop->fmt) {
case ARM_32_LPAE_S1:
case ARM_64_LPAE_S1:
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
case ARM_V8L_FAST:
#endif
case QCOM_ARM_64_LPAE_S1:
ttbr0 = iop->cfg.arm_lpae_s1_cfg.ttbr;
ret = 0;
break;
default:
ret = -EINVAL;
}
if (!ret) {
*ttbr0_ptr = phys_to_virt(ttbr0);
/*
* FIXME - fix ttbr1 retrieval later. In this kernel version
* struct io_pgtable no longer contains this information.
*/
*ttbr1_ptr = NULL;
}
return ret;
}
static struct iommu_debug_attachment *iommu_logger_init(
struct iommu_domain *domain,
struct device *dev,
struct io_pgtable *iop)
{
struct iommu_debug_attachment *logger;
char *client_name;
struct iommu_group *group;
unsigned int levels = iommu_logger_pgtable_levels(iop);
enum iommu_logger_pgtable_fmt fmt = iommu_logger_pgtable_fmt_lut(
iop->fmt);
void *ttbr0, *ttbr1;
int ret;
if (!levels || fmt == IOMMU_LOGGER_MAX_PGTABLE_FMTS)
return ERR_PTR(-EINVAL);
ret = iommu_logger_domain_ttbrs(iop, &ttbr0, &ttbr1);
if (ret)
return ERR_PTR(ret);
logger = kzalloc(sizeof(*logger), GFP_KERNEL);
if (!logger)
return ERR_PTR(-ENOMEM);
client_name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
if (!client_name) {
kfree(logger);
return ERR_PTR(-ENOMEM);
}
group = iommu_group_get(dev);
iommu_group_put(group);
INIT_LIST_HEAD(&logger->list);
logger->domain = domain;
logger->group = group;
logger->client_name = client_name;
logger->fmt = fmt;
logger->levels = levels;
logger->ttbr0 = ttbr0;
logger->ttbr1 = ttbr1;
logger->dev = dev;
return logger;
}
int iommu_logger_register(struct iommu_domain *domain, struct device *dev,
struct io_pgtable_ops *ops)
{
struct iommu_debug_attachment *logger;
struct io_pgtable *iop;
int ret = 0;
/* qcom,iommu-dma = "disabled" causes ops to be NULL */
if (!ops)
return 0;
if (!domain || !dev)
return -EINVAL;
iop = io_pgtable_ops_to_pgtable(ops);
mutex_lock(&iommu_debug_attachments_lock);
list_for_each_entry(logger, &iommu_debug_attachments, list)
if (logger->dev == dev && logger->domain == domain)
goto out;
logger = iommu_logger_init(domain, dev, iop);
if (IS_ERR(logger)) {
ret = PTR_ERR(logger);
goto out;
}
list_add(&logger->list, &iommu_debug_attachments);
out:
mutex_unlock(&iommu_debug_attachments_lock);
return ret;
}
EXPORT_SYMBOL(iommu_logger_register);
void iommu_logger_unregister(struct device *dev, struct iommu_domain *domain)
{
struct iommu_debug_attachment *logger, *tmp;
mutex_lock(&iommu_debug_attachments_lock);
list_for_each_entry_safe(logger, tmp, &iommu_debug_attachments, list) {
if (logger->dev == dev || logger->domain == domain) {
list_del(&logger->list);
kfree(logger->client_name);
kfree(logger);
}
}
mutex_unlock(&iommu_debug_attachments_lock);
}
EXPORT_SYMBOL(iommu_logger_unregister);
MODULE_DESCRIPTION("QTI IOMMU SUPPORT");
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,53 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2023-2024 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef __LINUX_QTI_IOMMU_LOGGER_H
#define __LINUX_QTI_IOMMU_LOGGER_H
#include <linux/io-pgtable.h>
enum iommu_logger_pgtable_fmt {
IOMMU_LOGGER_ARM_32_LPAE_S1,
IOMMU_LOGGER_ARM_64_LPAE_S1,
IOMMU_LOGGER_MAX_PGTABLE_FMTS,
};
/*
* Each group may have more than one domain; but each domain may
* only have one group.
*/
struct iommu_debug_attachment {
struct iommu_domain *domain;
struct iommu_group *group;
char *client_name;
enum iommu_logger_pgtable_fmt fmt;
unsigned int levels;
/*
* Virtual addresses of the top-level page tables are stored here,
* as they are more useful for debug tools than physical addresses.
*/
void *ttbr0;
void *ttbr1;
struct list_head list;
struct device *dev;
};
#if IS_ENABLED(CONFIG_QTI_IOMMU_SUPPORT)
int iommu_logger_register(struct iommu_domain *domain, struct device *dev,
struct io_pgtable_ops *ops);
void iommu_logger_unregister(struct device *dev, struct iommu_domain *domain);
#else
static inline int iommu_logger_register(struct iommu_domain *domain,
struct device *dev,
struct io_pgtable_ops *ops)
{
return 0;
}
static inline void iommu_logger_unregister(struct device *dev, struct iommu_domain *domain) {}
#endif /* CONFIG_QTI_IOMMU_LOGGER */
#endif /* __LINUX_QTI_IOMMU_LOGGER_H */

View File

@@ -0,0 +1,481 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2015-2019, 2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/dma-map-ops.h>
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/err.h>
#include <asm/barrier.h>
#include <linux/msm_dma_iommu_mapping.h>
#include <linux/qcom-dma-mapping.h>
/**
* struct msm_iommu_map - represents a mapping of an ion buffer to an iommu
* @lnode - list node to exist in the buffer's list of iommu mappings
* @dev - Device this is mapped to. Used as key
* @sgl - The scatterlist for this mapping
* @nents - Number of entries in sgl
* @dir - The direction for the map.
* @meta - Backpointer to the meta this guy belongs to.
* @ref - for reference counting this mapping
* @attrs - dma mapping attributes
* @buf_start_addr - address of start of buffer
*
* Represents a mapping of one dma_buf buffer to a particular device
* and address range. There may exist other mappings of this buffer in
* different devices. All mappings will have the same cacheability and security.
*/
struct msm_iommu_map {
struct list_head lnode;
struct rb_node node;
struct device *dev;
struct scatterlist *sgl;
unsigned int nents;
enum dma_data_direction dir;
struct msm_iommu_meta *meta;
struct kref ref;
unsigned long attrs;
dma_addr_t buf_start_addr;
};
struct msm_iommu_meta {
struct rb_node node;
struct list_head iommu_maps;
struct kref ref;
struct mutex lock;
void *buffer;
};
static struct rb_root iommu_root;
static DEFINE_MUTEX(msm_iommu_map_mutex);
static void msm_iommu_meta_add(struct msm_iommu_meta *meta)
{
struct rb_root *root = &iommu_root;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct msm_iommu_meta *entry;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct msm_iommu_meta, node);
if (meta->buffer < entry->buffer)
p = &(*p)->rb_left;
else if (meta->buffer > entry->buffer)
p = &(*p)->rb_right;
else
pr_err("%s: dma_buf %pK already exists\n", __func__,
entry->buffer);
}
rb_link_node(&meta->node, parent, p);
rb_insert_color(&meta->node, root);
}
static struct msm_iommu_meta *msm_iommu_meta_lookup(void *buffer)
{
struct rb_root *root = &iommu_root;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct msm_iommu_meta *entry = NULL;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct msm_iommu_meta, node);
if (buffer < entry->buffer)
p = &(*p)->rb_left;
else if (buffer > entry->buffer)
p = &(*p)->rb_right;
else
return entry;
}
return NULL;
}
static void msm_iommu_add(struct msm_iommu_meta *meta,
struct msm_iommu_map *iommu)
{
INIT_LIST_HEAD(&iommu->lnode);
list_add(&iommu->lnode, &meta->iommu_maps);
}
static struct msm_iommu_map *msm_iommu_lookup(struct msm_iommu_meta *meta,
struct device *dev)
{
struct msm_iommu_map *entry;
list_for_each_entry(entry, &meta->iommu_maps, lnode) {
if (entry->dev == dev)
return entry;
}
return NULL;
}
static struct msm_iommu_meta *msm_iommu_meta_create(struct dma_buf *dma_buf)
{
struct msm_iommu_meta *meta;
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
if (!meta)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&meta->iommu_maps);
meta->buffer = dma_buf->priv;
kref_init(&meta->ref);
mutex_init(&meta->lock);
msm_iommu_meta_add(meta);
return meta;
}
static void msm_iommu_meta_put(struct msm_iommu_meta *meta);
static struct scatterlist *clone_sgl(struct scatterlist *sg, int nents)
{
struct scatterlist *next, *s;
int i;
struct sg_table table;
if (sg_alloc_table(&table, nents, GFP_KERNEL))
return NULL;
next = table.sgl;
for_each_sg(sg, s, nents, i) {
*next = *s;
next = sg_next(next);
}
return table.sgl;
}
static inline int __msm_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
struct dma_buf *dma_buf,
unsigned long attrs)
{
struct msm_iommu_map *iommu_map;
struct msm_iommu_meta *iommu_meta = NULL;
int ret = 0;
bool extra_meta_ref_taken = false;
int late_unmap = !(attrs & DMA_ATTR_NO_DELAYED_UNMAP);
mutex_lock(&msm_iommu_map_mutex);
iommu_meta = msm_iommu_meta_lookup(dma_buf->priv);
if (!iommu_meta) {
iommu_meta = msm_iommu_meta_create(dma_buf);
if (IS_ERR(iommu_meta)) {
mutex_unlock(&msm_iommu_map_mutex);
ret = PTR_ERR(iommu_meta);
goto out;
}
if (late_unmap) {
kref_get(&iommu_meta->ref);
extra_meta_ref_taken = true;
}
} else {
kref_get(&iommu_meta->ref);
}
mutex_unlock(&msm_iommu_map_mutex);
mutex_lock(&iommu_meta->lock);
iommu_map = msm_iommu_lookup(iommu_meta, dev);
if (!iommu_map) {
iommu_map = kmalloc(sizeof(*iommu_map), GFP_KERNEL);
if (!iommu_map) {
ret = -ENOMEM;
goto out_unlock;
}
ret = dma_map_sg_attrs(dev, sg, nents, dir, attrs);
if (!ret) {
kfree(iommu_map);
goto out_unlock;
}
iommu_map->sgl = clone_sgl(sg, nents);
if (!iommu_map->sgl) {
kfree(iommu_map);
ret = -ENOMEM;
goto out_unlock;
}
iommu_map->nents = nents;
iommu_map->dev = dev;
iommu_map->dir = dir;
iommu_map->attrs = attrs;
iommu_map->buf_start_addr = sg_phys(sg);
kref_init(&iommu_map->ref);
if (late_unmap)
kref_get(&iommu_map->ref);
iommu_map->meta = iommu_meta;
msm_iommu_add(iommu_meta, iommu_map);
} else {
if (nents == iommu_map->nents &&
dir == iommu_map->dir &&
(attrs & ~DMA_ATTR_SKIP_CPU_SYNC) ==
(iommu_map->attrs & ~DMA_ATTR_SKIP_CPU_SYNC) &&
sg_phys(sg) == iommu_map->buf_start_addr) {
struct scatterlist *sg_tmp = sg;
struct scatterlist *map_sg;
int i;
for_each_sg(iommu_map->sgl, map_sg, nents, i) {
sg_dma_address(sg_tmp) = sg_dma_address(map_sg);
sg_dma_len(sg_tmp) = sg_dma_len(map_sg);
if (sg_dma_len(map_sg) == 0)
break;
sg_tmp = sg_next(sg_tmp);
if (sg_tmp == NULL)
break;
}
kref_get(&iommu_map->ref);
if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
dma_sync_sg_for_device(dev, iommu_map->sgl,
iommu_map->nents, iommu_map->dir);
if (dev_is_dma_coherent(dev))
/*
* Ensure all outstanding changes for coherent
* buffers are applied to the cache before any
* DMA occurs.
*/
dmb(ish);
ret = nents;
} else {
bool start_diff = (sg_phys(sg) !=
iommu_map->buf_start_addr);
dev_err(dev, "lazy map request differs:\n"
"req dir:%d, original dir:%d\n"
"req nents:%d, original nents:%d\n"
"req map attrs:%lu, original map attrs:%lu\n"
"req buffer start address differs:%d\n",
dir, iommu_map->dir, nents,
iommu_map->nents, attrs, iommu_map->attrs,
start_diff);
ret = -EINVAL;
}
}
mutex_unlock(&iommu_meta->lock);
return ret;
out_unlock:
mutex_unlock(&iommu_meta->lock);
out:
if (!IS_ERR(iommu_meta)) {
if (extra_meta_ref_taken)
msm_iommu_meta_put(iommu_meta);
msm_iommu_meta_put(iommu_meta);
}
return ret;
}
/*
* We are not taking a reference to the dma_buf here. It is expected that
* clients hold reference to the dma_buf until they are done with mapping and
* unmapping.
*/
int msm_dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, struct dma_buf *dma_buf,
unsigned long attrs)
{
int ret;
if (IS_ERR_OR_NULL(dev)) {
pr_err("%s: dev pointer is invalid\n", __func__);
return -EINVAL;
}
if (IS_ERR_OR_NULL(sg)) {
pr_err("%s: sg table pointer is invalid\n", __func__);
return -EINVAL;
}
if (IS_ERR_OR_NULL(dma_buf)) {
pr_err("%s: dma_buf pointer is invalid\n", __func__);
return -EINVAL;
}
ret = __msm_dma_map_sg(dev, sg, nents, dir, dma_buf, attrs);
return ret;
}
EXPORT_SYMBOL(msm_dma_map_sg_attrs);
static void msm_iommu_meta_destroy(struct kref *kref)
{
struct msm_iommu_meta *meta = container_of(kref, struct msm_iommu_meta,
ref);
if (!list_empty(&meta->iommu_maps)) {
WARN(1, "%s: DMA Buffer %pK being destroyed with outstanding iommu mappings!\n",
__func__, meta->buffer);
}
rb_erase(&meta->node, &iommu_root);
kfree(meta);
}
static void msm_iommu_meta_put(struct msm_iommu_meta *meta)
{
/*
* Need to lock here to prevent race against map/unmap
*/
mutex_lock(&msm_iommu_map_mutex);
kref_put(&meta->ref, msm_iommu_meta_destroy);
mutex_unlock(&msm_iommu_map_mutex);
}
static void msm_iommu_map_release(struct kref *kref)
{
struct msm_iommu_map *map = container_of(kref, struct msm_iommu_map,
ref);
struct sg_table table;
table.nents = table.orig_nents = map->nents;
table.sgl = map->sgl;
list_del(&map->lnode);
/* Skip an additional cache maintenance on the dma unmap path */
if (!(map->attrs & DMA_ATTR_SKIP_CPU_SYNC))
map->attrs |= DMA_ATTR_SKIP_CPU_SYNC;
dma_unmap_sg_attrs(map->dev, map->sgl, map->nents, map->dir,
map->attrs);
sg_free_table(&table);
kfree(map);
}
void msm_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir,
struct dma_buf *dma_buf, unsigned long attrs)
{
struct msm_iommu_map *iommu_map;
struct msm_iommu_meta *meta;
mutex_lock(&msm_iommu_map_mutex);
meta = msm_iommu_meta_lookup(dma_buf->priv);
if (!meta) {
WARN(1, "%s: (%pK) was never mapped\n", __func__, dma_buf);
mutex_unlock(&msm_iommu_map_mutex);
goto out;
}
mutex_unlock(&msm_iommu_map_mutex);
mutex_lock(&meta->lock);
iommu_map = msm_iommu_lookup(meta, dev);
if (!iommu_map) {
WARN(1, "%s: (%pK) was never mapped for device %p\n", __func__,
dma_buf, dev);
mutex_unlock(&meta->lock);
goto out;
}
if (dir != iommu_map->dir)
WARN(1, "%s: (%pK) dir:%d differs from original dir:%d\n",
__func__, dma_buf, dir, iommu_map->dir);
if (attrs && ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0))
dma_sync_sg_for_cpu(dev, iommu_map->sgl, iommu_map->nents, dir);
iommu_map->attrs = attrs;
kref_put(&iommu_map->ref, msm_iommu_map_release);
mutex_unlock(&meta->lock);
msm_iommu_meta_put(meta);
out:
return;
}
EXPORT_SYMBOL(msm_dma_unmap_sg_attrs);
int msm_dma_unmap_all_for_dev(struct device *dev)
{
int ret = 0;
struct msm_iommu_meta *meta;
struct rb_root *root;
struct rb_node *meta_node;
mutex_lock(&msm_iommu_map_mutex);
root = &iommu_root;
meta_node = rb_first(root);
while (meta_node) {
struct msm_iommu_map *iommu_map;
struct msm_iommu_map *iommu_map_next;
meta = rb_entry(meta_node, struct msm_iommu_meta, node);
mutex_lock(&meta->lock);
list_for_each_entry_safe(iommu_map, iommu_map_next,
&meta->iommu_maps, lnode)
if (iommu_map->dev == dev)
if (!kref_put(&iommu_map->ref,
msm_iommu_map_release))
ret = -EINVAL;
mutex_unlock(&meta->lock);
meta_node = rb_next(meta_node);
}
mutex_unlock(&msm_iommu_map_mutex);
return ret;
}
EXPORT_SYMBOL(msm_dma_unmap_all_for_dev);
/*
* Only to be called by ION code when a buffer is freed
*/
void msm_dma_buf_freed(void *buffer)
{
struct msm_iommu_map *iommu_map;
struct msm_iommu_map *iommu_map_next;
struct msm_iommu_meta *meta;
mutex_lock(&msm_iommu_map_mutex);
meta = msm_iommu_meta_lookup(buffer);
if (!meta) {
/* Already unmapped (assuming no late unmapping) */
mutex_unlock(&msm_iommu_map_mutex);
return;
}
mutex_unlock(&msm_iommu_map_mutex);
mutex_lock(&meta->lock);
list_for_each_entry_safe(iommu_map, iommu_map_next, &meta->iommu_maps,
lnode)
kref_put(&iommu_map->ref, msm_iommu_map_release);
if (!list_empty(&meta->iommu_maps)) {
WARN(1, "%s: DMA buffer %pK destroyed with outstanding iommu mappings\n",
__func__, meta->buffer);
}
INIT_LIST_HEAD(&meta->iommu_maps);
mutex_unlock(&meta->lock);
msm_iommu_meta_put(meta);
}
EXPORT_SYMBOL(msm_dma_buf_freed);
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,815 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2014, 2020-2021, The Linux Foundation. All rights reserved.
* Contiguous Memory Allocator for DMA mapping framework
* Copyright (c) 2010-2011 by Samsung Electronics.
* Written by:
* Marek Szyprowski <m.szyprowski@samsung.com>
* Michal Nazarewicz <mina86@mina86.com>
* Copyright (C) 2012, 2014-2015 ARM Ltd.
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
#include <linux/genalloc.h>
#include <linux/dma-direct.h>
#include <linux/cma.h>
#include <linux/iova.h>
#include <linux/dma-map-ops.h>
#include <linux/dma-mapping.h>
#include <linux/qcom-dma-mapping.h>
#include <linux/of_reserved_mem.h>
#include <linux/iommu.h>
#include <linux/qcom-iommu-util.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include "qcom-dma-iommu-generic.h"
static bool probe_finished;
static struct device *qcom_dma_iommu_dev;
static struct cma *qcom_dma_contiguous_default_area;
struct pci_host_bridge *qcom_pci_find_host_bridge(struct pci_bus *bus)
{
while (bus->parent)
bus = bus->parent;
return to_pci_host_bridge(bus->bridge);
}
/*
* This avoids arch-specific assembly, but may be slower since it calls
* back into the dma layer again.
*/
void qcom_arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
dma_addr_t dma_addr = phys_to_dma(qcom_dma_iommu_dev, paddr);
dma_sync_single_for_device(qcom_dma_iommu_dev,
dma_addr, size, dir);
}
void qcom_arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
dma_addr_t dma_addr = phys_to_dma(qcom_dma_iommu_dev, paddr);
dma_sync_single_for_cpu(qcom_dma_iommu_dev,
dma_addr, size, dir);
}
void qcom_arch_dma_prep_coherent(struct page *page, size_t size)
{
phys_addr_t phys = page_to_phys(page);
dma_addr_t dma_addr = phys_to_dma(qcom_dma_iommu_dev, phys);
dma_sync_single_for_device(qcom_dma_iommu_dev,
dma_addr, size, DMA_TO_DEVICE);
}
static struct cma *qcom_dev_get_cma_area(struct device *dev)
{
if (dev && dev->cma_area)
return dev->cma_area;
return qcom_dma_contiguous_default_area;
}
struct page *qcom_dma_alloc_from_contiguous(struct device *dev, size_t count,
unsigned int align, bool no_warn)
{
if (align > CONFIG_CMA_ALIGNMENT)
align = CONFIG_CMA_ALIGNMENT;
return cma_alloc(qcom_dev_get_cma_area(dev), count, align, no_warn);
}
bool qcom_dma_release_from_contiguous(struct device *dev, struct page *pages,
int count)
{
return cma_release(qcom_dev_get_cma_area(dev), pages, count);
}
static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp)
{
unsigned int align = min(get_order(size), CONFIG_CMA_ALIGNMENT);
return cma_alloc(cma, size >> PAGE_SHIFT, align, gfp & __GFP_NOWARN);
}
struct page *qcom_dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
{
/* CMA can be used only in the context which permits sleeping */
if (!gfpflags_allow_blocking(gfp))
return NULL;
if (dev->cma_area)
return cma_alloc_aligned(dev->cma_area, size, gfp);
if (size <= PAGE_SIZE || !qcom_dma_contiguous_default_area)
return NULL;
return cma_alloc_aligned(qcom_dma_contiguous_default_area, size, gfp);
}
void qcom_dma_free_contiguous(struct device *dev, struct page *page, size_t size)
{
if (!cma_release(qcom_dev_get_cma_area(dev), page,
PAGE_ALIGN(size) >> PAGE_SHIFT))
__free_pages(page, get_order(size));
}
/*
* find_vm_area is not exported. Some dma apis expect that an array of
* struct pages can be saved in the vm_area, and retrieved at a later time.
*/
struct rb_root _root;
struct rb_root *root = &_root;
DEFINE_MUTEX(rbtree_lock);
struct qcom_iommu_dma_area {
struct rb_node node;
unsigned long addr;
struct page **pages;
};
static void qcom_insert_vm_area(struct qcom_iommu_dma_area *area)
{
struct rb_node **new, *parent;
mutex_lock(&rbtree_lock);
parent = NULL;
new = &root->rb_node;
while (*new) {
struct qcom_iommu_dma_area *entry;
entry = rb_entry(*new,
struct qcom_iommu_dma_area,
node);
parent = *new;
if (area->addr < entry->addr)
new = &((*new)->rb_left);
else if (area->addr > entry->addr)
new = &((*new)->rb_right);
else {
mutex_unlock(&rbtree_lock);
WARN_ON(1);
return;
}
}
rb_link_node(&area->node, parent, new);
rb_insert_color(&area->node, root);
mutex_unlock(&rbtree_lock);
}
static struct qcom_iommu_dma_area *qcom_find_vm_area(const void *cpu_addr)
{
struct rb_node *node;
struct qcom_iommu_dma_area *entry;
unsigned long addr = (unsigned long)cpu_addr;
mutex_lock(&rbtree_lock);
node = root->rb_node;
while (node) {
entry = rb_entry(node,
struct qcom_iommu_dma_area,
node);
if (addr < entry->addr)
node = node->rb_left;
else if (addr > entry->addr)
node = node->rb_right;
else {
mutex_unlock(&rbtree_lock);
return entry;
}
}
mutex_unlock(&rbtree_lock);
return NULL;
}
struct page **qcom_dma_common_find_pages(void *cpu_addr)
{
struct qcom_iommu_dma_area *area = qcom_find_vm_area(cpu_addr);
if (!area)
return NULL;
return area->pages;
}
/*
* Remaps an array of PAGE_SIZE pages into another vm_area.
* Cannot be used in non-sleeping contexts
*/
void *qcom_dma_common_pages_remap(struct page **pages, size_t size,
pgprot_t prot, const void *caller)
{
struct qcom_iommu_dma_area *area;
void *vaddr;
area = kzalloc(sizeof(*area), GFP_KERNEL);
if (!area)
return NULL;
vaddr = vmap(pages, PAGE_ALIGN(size) >> PAGE_SHIFT,
VM_DMA_COHERENT, prot);
if (!vaddr) {
kfree(area);
return NULL;
}
area->pages = pages;
area->addr = (unsigned long)vaddr;
qcom_insert_vm_area(area);
return vaddr;
}
/*
* Remaps an allocated contiguous region into another vm_area.
* Cannot be used in non-sleeping contexts
*/
void *qcom_dma_common_contiguous_remap(struct page *page, size_t size,
pgprot_t prot, const void *caller)
{
int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page **pages;
void *vaddr;
int i;
pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
if (!pages)
return NULL;
for (i = 0; i < count; i++)
pages[i] = nth_page(page, i);
vaddr = vmap(pages, count, VM_DMA_COHERENT, prot);
kfree(pages);
return vaddr;
}
/*
* Unmaps a range previously mapped by dma_common_contiguous_remap or
* dma_common_pages_remap. Note that dma_common_contiguous_remap does
* not insert an rb_tree entry since there is no pages array to save.
*/
void qcom_dma_common_free_remap(void *cpu_addr, size_t size)
{
struct qcom_iommu_dma_area *area;
/* qcom_dma_common_contiguous_remap doesn't save the pages array */
area = qcom_find_vm_area(cpu_addr);
if (area) {
mutex_lock(&rbtree_lock);
rb_erase(&area->node, root);
mutex_unlock(&rbtree_lock);
kfree(area);
}
vunmap(cpu_addr);
}
static struct gen_pool *atomic_pool __ro_after_init;
static size_t atomic_pool_size;
static unsigned long current_pool_size;
/* Dynamic background expansion when the atomic pool is near capacity */
static struct work_struct atomic_pool_work;
static void dma_atomic_pool_debugfs_init(void)
{
struct dentry *root;
root = debugfs_create_dir("qcom_dma_pools", NULL);
if (IS_ERR_OR_NULL(root))
return;
debugfs_create_ulong("pool_size", 0400, root, &current_pool_size);
}
static void dma_atomic_pool_size_add(gfp_t gfp, size_t size)
{
current_pool_size += size;
}
static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
gfp_t gfp)
{
unsigned int order;
struct page *page = NULL;
void *addr;
int ret = -ENOMEM;
/* Cannot allocate larger than MAX_ORDER - 1 */
order = min(get_order(pool_size), MAX_ORDER - 1);
do {
pool_size = 1 << (PAGE_SHIFT + order);
if (qcom_dev_get_cma_area(NULL))
page = qcom_dma_alloc_from_contiguous(NULL, 1 << order,
order, false);
else
page = alloc_pages(gfp, order);
} while (!page && order-- > 0);
if (!page)
goto out;
qcom_arch_dma_prep_coherent(page, pool_size);
addr = qcom_dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
goto free_page;
ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
pool_size, NUMA_NO_NODE);
if (ret)
goto remove_mapping;
dma_atomic_pool_size_add(gfp, pool_size);
return 0;
remove_mapping:
qcom_dma_common_free_remap(addr, pool_size);
free_page:
if (!qcom_dma_release_from_contiguous(NULL, page, 1 << order))
__free_pages(page, order);
out:
return ret;
}
static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp)
{
if (pool && gen_pool_avail(pool) < atomic_pool_size)
atomic_pool_expand(pool, gen_pool_size(pool), gfp);
}
static void atomic_pool_work_fn(struct work_struct *work)
{
atomic_pool_resize(atomic_pool, GFP_KERNEL);
}
static struct gen_pool *__dma_atomic_pool_init(size_t pool_size, gfp_t gfp)
{
struct gen_pool *pool;
int ret;
pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
if (!pool)
return NULL;
gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL);
ret = atomic_pool_expand(pool, pool_size, gfp);
if (ret) {
gen_pool_destroy(pool);
pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
pool_size >> 10, &gfp);
return NULL;
}
pr_info("DMA preallocated %zu KiB %pGg pool for atomic allocations\n",
gen_pool_size(pool) >> 10, &gfp);
return pool;
}
static int dma_atomic_pool_init(struct device *dev)
{
int ret = 0;
unsigned long pages;
/* Default the pool size to 128KB per 1 GB of memory, min 128 KB, max MAX_ORDER - 1. */
pages = totalram_pages() / (SZ_1G / SZ_128K);
pages = min_t(unsigned long, pages, MAX_ORDER_NR_PAGES);
atomic_pool_size = max_t(size_t, pages << PAGE_SHIFT, SZ_128K);
INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
atomic_pool = __dma_atomic_pool_init(atomic_pool_size, GFP_KERNEL);
if (!atomic_pool)
return -ENOMEM;
dma_atomic_pool_debugfs_init();
return ret;
}
/*
* Couldn't implement this via dma_alloc_attrs(qcom_iommu_dma_dev, GFP_ATOMIC)
* due to dma_free_from_pool only passing in cpu_addr & not dma_handle.
*/
void *qcom_dma_alloc_from_pool(struct device *dev, size_t size,
struct page **ret_page, gfp_t flags)
{
unsigned long val;
void *ptr = NULL;
if (!atomic_pool) {
WARN(1, "coherent pool not initialised!\n");
return NULL;
}
val = gen_pool_alloc(atomic_pool, size);
if (val) {
phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
*ret_page = pfn_to_page(__phys_to_pfn(phys));
ptr = (void *)val;
memset(ptr, 0, size);
}
if (gen_pool_avail(atomic_pool) < atomic_pool_size)
schedule_work(&atomic_pool_work);
return ptr;
}
bool qcom_dma_free_from_pool(struct device *dev, void *start, size_t size)
{
if (!atomic_pool || !gen_pool_has_addr(atomic_pool, (unsigned long)start, size))
return false;
gen_pool_free(atomic_pool, (unsigned long)start, size);
return true;
}
static void qcom_dma_atomic_pool_exit(struct device *dev)
{
unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
void *addr;
struct page *page;
/*
* Find the starting address. Pool is expected to be unused.
*
* While the pool size can expand, it is okay to use the initial size
* here, as this function can only ever be called prior to the pool
* ever being used. The pool can only expand when an allocation is satisfied
* from it, which would not be possible by the time this function is called.
* Therefore the size of the pool will be the initial size.
*/
addr = (void *)gen_pool_alloc(atomic_pool, atomic_pool_size);
if (!addr) {
WARN_ON(1);
return;
}
gen_pool_free(atomic_pool, (unsigned long)addr, atomic_pool_size);
gen_pool_destroy(atomic_pool);
page = vmalloc_to_page(addr);
qcom_dma_common_free_remap(addr, atomic_pool_size);
qcom_dma_release_from_contiguous(dev, page, nr_pages);
}
/*
* struct dma_coherent_mem is private, so we cna't access it. 0 indicates
* an error condition for dma_mmap_from_dev_coherent.
*/
int qcom_dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
void *vaddr, size_t size, int *ret)
{
return 0;
}
/*
* Return the page attributes used for mapping dma_alloc_* memory, either in
* kernel space if remapping is needed, or to userspace through dma_mmap_*.
*/
pgprot_t qcom_dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
{
if (dev_is_dma_coherent(dev))
return prot;
#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
if (attrs & DMA_ATTR_WRITE_COMBINE)
return pgprot_writecombine(prot);
#endif
return pgprot_dmacoherent(prot);
}
/**
* dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
* page flags.
* @dir: Direction of DMA transfer
* @coherent: Is the DMA master cache-coherent?
* @attrs: DMA attributes for the mapping
*
* Return: corresponding IOMMU API page protection flags
*/
int qcom_dma_info_to_prot(enum dma_data_direction dir, bool coherent,
unsigned long attrs)
{
int prot = coherent ? IOMMU_CACHE : 0;
if (attrs & DMA_ATTR_PRIVILEGED)
prot |= IOMMU_PRIV;
if (attrs & DMA_ATTR_SYS_CACHE)
prot |= IOMMU_SYS_CACHE;
if (attrs & DMA_ATTR_SYS_CACHE_NWA)
prot |= IOMMU_SYS_CACHE_NWA;
switch (dir) {
case DMA_BIDIRECTIONAL:
return prot | IOMMU_READ | IOMMU_WRITE;
case DMA_TO_DEVICE:
return prot | IOMMU_READ;
case DMA_FROM_DEVICE:
return prot | IOMMU_WRITE;
default:
return 0;
}
}
/*
* The DMA API client is passing in a scatterlist which could describe
* any old buffer layout, but the IOMMU API requires everything to be
* aligned to IOMMU pages. Hence the need for this complicated bit of
* impedance-matching, to be able to hand off a suitably-aligned list,
* but still preserve the original offsets and sizes for the caller.
*/
size_t qcom_iommu_dma_prepare_map_sg(struct device *dev, struct iova_domain *iovad,
struct scatterlist *sg, int nents)
{
struct scatterlist *s, *prev = NULL;
size_t iova_len = 0;
unsigned long mask = dma_get_seg_boundary(dev);
int i;
/*
* Work out how much IOVA space we need, and align the segments to
* IOVA granules for the IOMMU driver to handle. With some clever
* trickery we can modify the list in-place, but reversibly, by
* stashing the unaligned parts in the as-yet-unused DMA fields.
*/
for_each_sg(sg, s, nents, i) {
size_t s_iova_off = iova_offset(iovad, s->offset);
size_t s_length = s->length;
size_t pad_len = (mask - iova_len + 1) & mask;
sg_dma_address(s) = s_iova_off;
sg_dma_len(s) = s_length;
s->offset -= s_iova_off;
s_length = iova_align(iovad, s_length + s_iova_off);
s->length = s_length;
/*
* Due to the alignment of our single IOVA allocation, we can
* depend on these assumptions about the segment boundary mask:
* - If mask size >= IOVA size, then the IOVA range cannot
* possibly fall across a boundary, so we don't care.
* - If mask size < IOVA size, then the IOVA range must start
* exactly on a boundary, therefore we can lay things out
* based purely on segment lengths without needing to know
* the actual addresses beforehand.
* - The mask must be a power of 2, so pad_len == 0 if
* iova_len == 0, thus we cannot dereference prev the first
* time through here (i.e. before it has a meaningful value).
*/
if (pad_len && pad_len < s_length - 1) {
prev->length += pad_len;
iova_len += pad_len;
}
iova_len += s_length;
prev = s;
}
return iova_len;
}
/*
* Prepare a successfully-mapped scatterlist to give back to the caller.
*
* At this point the segments are already laid out by iommu_dma_map_sg() to
* avoid individually crossing any boundaries, so we merely need to check a
* segment's start address to avoid concatenating across one.
*/
int qcom_iommu_dma_finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
dma_addr_t dma_addr)
{
struct scatterlist *s, *cur = sg;
unsigned long seg_mask = dma_get_seg_boundary(dev);
unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
int i, count = 0;
for_each_sg(sg, s, nents, i) {
/* Restore this segment's original unaligned fields first */
unsigned int s_iova_off = sg_dma_address(s);
unsigned int s_length = sg_dma_len(s);
unsigned int s_iova_len = s->length;
s->offset += s_iova_off;
s->length = s_length;
sg_dma_address(s) = DMA_MAPPING_ERROR;
sg_dma_len(s) = 0;
/*
* Now fill in the real DMA data. If...
* - there is a valid output segment to append to
* - and this segment starts on an IOVA page boundary
* - but doesn't fall at a segment boundary
* - and wouldn't make the resulting output segment too long
*/
if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
(max_len - cur_len >= s_length)) {
/* ...then concatenate it with the previous one */
cur_len += s_length;
} else {
/* Otherwise start the next output segment */
if (i > 0)
cur = sg_next(cur);
cur_len = s_length;
count++;
sg_dma_address(cur) = dma_addr + s_iova_off;
}
sg_dma_len(cur) = cur_len;
dma_addr += s_iova_len;
if (s_length + s_iova_off < s_iova_len)
cur_len = 0;
}
return count;
}
/*
* If mapping failed, then just restore the original list,
* but making sure the DMA fields are invalidated.
*/
void qcom_iommu_dma_invalidate_sg(struct scatterlist *sg, int nents)
{
struct scatterlist *s;
int i;
for_each_sg(sg, s, nents, i) {
if (sg_dma_address(s) != DMA_MAPPING_ERROR)
s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
sg_dma_address(s) = DMA_MAPPING_ERROR;
sg_dma_len(s) = 0;
}
}
/**
* __iommu_dma_mmap - Map a buffer into provided user VMA
* @pages: Array representing buffer from __iommu_dma_alloc()
* @size: Size of buffer in bytes
* @vma: VMA describing requested userspace mapping
*
* Maps the pages of the buffer in @pages into @vma. The caller is responsible
* for verifying the correct size and protection of @vma beforehand.
*/
static int __qcom_iommu_dma_mmap(struct page **pages, size_t size,
struct vm_area_struct *vma)
{
return vm_map_pages(vma, pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
}
int qcom_iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long pfn, off = vma->vm_pgoff;
int ret;
vma->vm_page_prot = qcom_dma_pgprot(dev, vma->vm_page_prot, attrs);
if (qcom_dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
return -ENXIO;
if (is_vmalloc_addr(cpu_addr)) {
struct page **pages = qcom_dma_common_find_pages(cpu_addr);
if (pages)
return __qcom_iommu_dma_mmap(pages, size, vma);
pfn = vmalloc_to_pfn(cpu_addr);
} else {
pfn = page_to_pfn(virt_to_page(cpu_addr));
}
return remap_pfn_range(vma, vma->vm_start, pfn + off,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
}
int qcom_iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
struct page *page;
int ret;
if (is_vmalloc_addr(cpu_addr)) {
struct page **pages = qcom_dma_common_find_pages(cpu_addr);
if (pages) {
return sg_alloc_table_from_pages(sgt, pages,
PAGE_ALIGN(size) >> PAGE_SHIFT,
0, size, GFP_KERNEL);
}
page = vmalloc_to_page(cpu_addr);
} else {
page = virt_to_page(cpu_addr);
}
ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (!ret)
sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
return ret;
}
static int qcom_dma_iommu_probe(struct platform_device *pdev)
{
int ret;
struct device *dev = &pdev->dev;
qcom_dma_iommu_dev = dev;
if (dev_is_dma_coherent(dev)) {
dev_err(dev, "Cannot be dma-coherent\n");
return -EINVAL;
}
/* Should be connected to linux,cma-default node */
ret = of_reserved_mem_device_init_by_idx(dev, dev->of_node, 0);
if (ret)
return ret;
qcom_dma_contiguous_default_area = dev->cma_area;
if (!qcom_dma_contiguous_default_area) {
dev_err(dev, "Unable to find cma area\n");
return -EINVAL;
}
ret = dma_atomic_pool_init(dev);
if (ret)
goto out_iova_cache;
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret)
goto out_atomic_pool;
probe_finished = true;
return 0;
out_atomic_pool:
qcom_dma_atomic_pool_exit(dev);
out_iova_cache:
return ret;
}
bool qcom_dma_iommu_is_ready(void)
{
if (!probe_finished)
return false;
return true;
}
EXPORT_SYMBOL(qcom_dma_iommu_is_ready);
static int qcom_dma_iommu_remove(struct platform_device *pdev)
{
qcom_dma_atomic_pool_exit(&pdev->dev);
return 0;
}
static const struct of_device_id qcom_dma_iommu_of_match[] = {
{.compatible = "qcom,iommu-dma"},
{}
};
MODULE_DEVICE_TABLE(of, qcom_dma_iommu_of_match);
static struct platform_driver qcom_dma_iommu_driver = {
.probe = qcom_dma_iommu_probe,
.remove = qcom_dma_iommu_remove,
.driver = {
.name = "qcom_dma_iommu",
.of_match_table = qcom_dma_iommu_of_match,
.suppress_bind_attrs = true,
},
};
int __init qcom_dma_iommu_generic_driver_init(void)
{
return platform_driver_register(&qcom_dma_iommu_driver);
}
void qcom_dma_iommu_generic_driver_exit(void)
{
platform_driver_unregister(&qcom_dma_iommu_driver);
}

View File

@@ -0,0 +1,86 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef __QCOM_DMA_IOMMU_GENERIC_H
#define __QCOM_DMA_IOMMU_GENERIC_H
#include <linux/device.h>
#include <linux/dma-direction.h>
#include <linux/pci.h>
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
bool qcom_dma_iommu_is_ready(void);
extern int __init qcom_dma_iommu_generic_driver_init(void);
extern void qcom_dma_iommu_generic_driver_exit(void);
struct pci_host_bridge *qcom_pci_find_host_bridge(struct pci_bus *bus);
void qcom_arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir);
void qcom_arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
enum dma_data_direction dir);
void qcom_arch_dma_prep_coherent(struct page *page, size_t size);
/* kernel/dma/contiguous.c */
struct page *qcom_dma_alloc_from_contiguous(struct device *dev, size_t count,
unsigned int align, bool no_warn);
bool qcom_dma_release_from_contiguous(struct device *dev, struct page *pages,
int count);
struct page *qcom_dma_alloc_contiguous(struct device *dev, size_t size,
gfp_t gfp);
void qcom_dma_free_contiguous(struct device *dev, struct page *page,
size_t size);
/* kernel/dma/remap.c */
struct page **qcom_dma_common_find_pages(void *cpu_addr);
void *qcom_dma_common_pages_remap(struct page **pages, size_t size,
pgprot_t prot, const void *caller);
void *qcom_dma_common_contiguous_remap(struct page *page, size_t size,
pgprot_t prot, const void *caller);
void qcom_dma_common_free_remap(void *cpu_addr, size_t size);
void *qcom_dma_alloc_from_pool(struct device *dev, size_t size,
struct page **ret_page, gfp_t flags);
bool qcom_dma_free_from_pool(struct device *dev, void *start, size_t size);
int qcom_dma_mmap_from_dev_coherent(struct device *dev,
struct vm_area_struct *vma, void *vaddr, size_t size, int *ret);
/* kernel/dma/mapping.c */
pgprot_t qcom_dma_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs);
/* DMA-IOMMU utilities */
int qcom_dma_info_to_prot(enum dma_data_direction dir, bool coherent,
unsigned long attrs);
size_t qcom_iommu_dma_prepare_map_sg(struct device *dev, struct iova_domain *iovad,
struct scatterlist *sg, int nents);
int qcom_iommu_dma_finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
dma_addr_t dma_addr);
void qcom_iommu_dma_invalidate_sg(struct scatterlist *sg, int nents);
int qcom_iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
int qcom_iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
#else /*CONFIG_IOMMU_IO_PGTABLE_FAST*/
static inline bool qcom_dma_iommu_is_ready(void)
{
return true;
}
static inline int __init qcom_dma_iommu_generic_driver_init(void)
{
return 0;
}
static inline void qcom_dma_iommu_generic_driver_exit(void) {}
#endif /*CONFIG_IOMMU_IO_PGTABLE_FAST*/
#endif /* __QCOM_DMA_IOMMU_GENERIC_H */

View File

@@ -0,0 +1,323 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
* Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/shrinker.h>
#include <linux/slab.h>
#include <linux/firmware/qcom/qcom_scm.h>
#include <soc/qcom/secure_buffer.h>
struct io_pgtable_pool {
u32 vmid;
struct kref ref;
spinlock_t pool_lock;
struct list_head page_pool;
};
static DEFINE_MUTEX(page_pool_xa_lock);
static DEFINE_XARRAY(page_pool_xa);
static atomic_long_t page_pool_count = ATOMIC_LONG_INIT(0);
static bool is_secure_vmid(u32 vmid)
{
return !!vmid;
}
static int io_pgtable_hyp_assign_page(u32 vmid, struct page *page)
{
struct qcom_scm_vmperm dst_vmids[] = {{QCOM_SCM_VMID_HLOS,
PERM_READ | PERM_WRITE},
{vmid, PERM_READ}};
u64 src_vmid_list = BIT(QCOM_SCM_VMID_HLOS);
phys_addr_t page_addr = page_to_phys(page);
int ret;
ret = qcom_scm_assign_mem(page_to_phys(page), PAGE_SIZE, &src_vmid_list,
dst_vmids, ARRAY_SIZE(dst_vmids));
if (ret)
pr_err("failed qcom_assign for %pa address of size %zx - subsys VMid %d rc:%d\n",
&page_addr, PAGE_SIZE, vmid, ret);
WARN(ret, "failed to assign memory to VMID: %u rc:%d\n", vmid, ret);
return ret ? -EADDRNOTAVAIL : 0;
}
static int io_pgtable_hyp_unassign_page(u32 vmid, struct page *page)
{
struct qcom_scm_vmperm dst_vmids[] = {{QCOM_SCM_VMID_HLOS,
PERM_READ | PERM_WRITE | PERM_EXEC}};
u64 src_vmid_list = BIT(QCOM_SCM_VMID_HLOS) | BIT(vmid);
phys_addr_t page_addr = page_to_phys(page);
int ret;
ret = qcom_scm_assign_mem(page_to_phys(page), PAGE_SIZE, &src_vmid_list,
dst_vmids, ARRAY_SIZE(dst_vmids));
if (ret)
pr_err("failed qcom_assign for unassigning %pa address of size %zx - subsys VMid %d rc:%d\n",
&page_addr, PAGE_SIZE, vmid, ret);
WARN(ret, "failed to unassign memory from VMID: %u rc: %d\n", vmid, ret);
return ret ? -EADDRNOTAVAIL : 0;
}
static struct page *__alloc_page_from_pool(struct list_head *page_pool)
{
struct page *page;
page = list_first_entry_or_null(page_pool, struct page, lru);
if (page) {
list_del(&page->lru);
atomic_long_dec(&page_pool_count);
dec_node_page_state(page, NR_KERNEL_MISC_RECLAIMABLE);
}
return page;
}
static struct page *alloc_page_from_pool(u32 vmid)
{
struct io_pgtable_pool *pool = xa_load(&page_pool_xa, vmid);
struct page *page;
unsigned long flags;
spin_lock_irqsave(&pool->pool_lock, flags);
page = __alloc_page_from_pool(&pool->page_pool);
spin_unlock_irqrestore(&pool->pool_lock, flags);
return page;
}
static void free_page_to_pool(struct page *page)
{
u32 vmid = page_private(page);
struct io_pgtable_pool *pool = xa_load(&page_pool_xa, vmid);
unsigned long flags;
clear_page(page_address(page));
spin_lock_irqsave(&pool->pool_lock, flags);
list_add(&page->lru, &pool->page_pool);
atomic_long_inc(&page_pool_count);
inc_node_page_state(page, NR_KERNEL_MISC_RECLAIMABLE);
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
/* Assumes that page_pool_xa_lock is held. */
static void io_pgtable_pool_release(struct kref *ref)
{
struct io_pgtable_pool *pool = container_of(ref, struct io_pgtable_pool, ref);
struct page *page;
bool secure_vmid = is_secure_vmid(pool->vmid);
xa_erase(&page_pool_xa, pool->vmid);
/*
* There's no need to take the pool lock, as the pool is no longer accessible to other
* IOMMU clients. There's no possibility for concurrent access either as this
* function is only invoked when the last reference is removed.
*/
page = __alloc_page_from_pool(&pool->page_pool);
while (page) {
if (!secure_vmid || !io_pgtable_hyp_unassign_page(pool->vmid, page))
__free_page(page);
page = __alloc_page_from_pool(&pool->page_pool);
}
kfree(pool);
}
/*
* qcom_io_pgtable_allocator_register: Register with the io-pgtable allocator interface.
*
* @vmid: The VMID that io-pgtable memory needs to be shared with when allocated. If VMID
* is 0, then page table memory will not be shared with any other VMs.
*
* On success, 0 is returned and there will be a reference held for metadata associated with
* @vmid. Otherwise, an error code will be returned.
*/
int qcom_io_pgtable_allocator_register(u32 vmid)
{
struct io_pgtable_pool *pool;
int ret = 0;
mutex_lock(&page_pool_xa_lock);
pool = xa_load(&page_pool_xa, vmid);
if (pool) {
kref_get(&pool->ref);
goto out;
}
pool = kmalloc(sizeof(*pool), GFP_KERNEL);
if (!pool) {
ret = -ENOMEM;
goto out;
}
pool->vmid = vmid;
kref_init(&pool->ref);
spin_lock_init(&pool->pool_lock);
INIT_LIST_HEAD(&pool->page_pool);
ret = xa_err(xa_store(&page_pool_xa, vmid, pool, GFP_KERNEL));
if (ret < 0)
kfree(pool);
out:
mutex_unlock(&page_pool_xa_lock);
return ret;
}
/*
* qcom_io_pgtable_allocator_unregister: Unregister with the io-pgtable allocator interface.
*
* @vmid: The VMID that was used when registering with the interface with
* qcom_io_pgtable_allocator_register().
*
* Decrements the references to allocator metadata for @vmid.
*
* If this call results in references to @vmid dropping to 0, then all metadata and pages
* associated with @vmid are released.
*/
void qcom_io_pgtable_allocator_unregister(u32 vmid)
{
struct io_pgtable_pool *pool;
mutex_lock(&page_pool_xa_lock);
pool = xa_load(&page_pool_xa, vmid);
kref_put(&pool->ref, io_pgtable_pool_release);
mutex_unlock(&page_pool_xa_lock);
}
/*
* qcom_io_pgtable_alloc_page: Allocate page table memory from the io-pgtable allocator.
*
* @vmid: The VMID that the page table memory should be shared with.
* @gfp: The GFP flags to be used for allocating the page table memory.
*
* This function may sleep if memory needs to be shared with other VMs.
*
* On success, a page will be returned. The page will also have been shared with other
* VMs--if any. In case of an error, this function returns NULL.
*/
struct page *qcom_io_pgtable_alloc_page(u32 vmid, gfp_t gfp)
{
struct page *page;
/*
* Mapping memory for secure domains may result in having to assign page table
* memory to another VMID, which can sleep. Atomic and secure domains are
* not a legal combination. We can use the GFP flags to detect atomic domains,
* as they will have GFP_ATOMIC set.
*/
BUG_ON(!gfpflags_allow_blocking(gfp) && is_secure_vmid(vmid));
page = alloc_page_from_pool(vmid);
if (page)
return page;
page = alloc_page(gfp);
if (!page)
return NULL;
/* The page may be inaccessible if this is true, so leak it. */
else if (is_secure_vmid(vmid) && io_pgtable_hyp_assign_page(vmid, page))
return NULL;
set_page_private(page, (unsigned long)vmid);
return page;
}
/*
* qcom_io_pgtable_free_page: Frees page table memory.
*
* @page: The page to be freed.
*
* We cache pages in their respective page pools to improve performance
* for future allocations.
*
* Export this symbol for the IOMMU driver, since it decides when
* page table memory is freed after TLB maintenance.
*/
void qcom_io_pgtable_free_page(struct page *page)
{
free_page_to_pool(page);
}
EXPORT_SYMBOL(qcom_io_pgtable_free_page);
static unsigned long io_pgtable_alloc_count_objects(struct shrinker *shrinker,
struct shrink_control *sc)
{
unsigned long count = atomic_long_read(&page_pool_count);
return count ? count : SHRINK_EMPTY;
}
static unsigned long scan_page_pool(struct io_pgtable_pool *pool, struct list_head *freelist,
unsigned long nr_to_scan)
{
struct page *page;
unsigned long count = 0, flags;
spin_lock_irqsave(&pool->pool_lock, flags);
while (count < nr_to_scan) {
page = __alloc_page_from_pool(&pool->page_pool);
if (page) {
list_add(&page->lru, freelist);
count++;
} else {
break;
}
}
spin_unlock_irqrestore(&pool->pool_lock, flags);
return count;
}
static unsigned long io_pgtable_alloc_scan_objects(struct shrinker *shrinker,
struct shrink_control *sc)
{
struct page *page, *tmp;
struct io_pgtable_pool *pool;
unsigned long index;
unsigned long nr_to_scan = sc->nr_to_scan, count = 0;
u32 vmid;
LIST_HEAD(freelist);
mutex_lock(&page_pool_xa_lock);
xa_for_each(&page_pool_xa, index, pool) {
count += scan_page_pool(pool, &freelist, nr_to_scan - count);
if (count >= nr_to_scan)
break;
}
mutex_unlock(&page_pool_xa_lock);
list_for_each_entry_safe(page, tmp, &freelist, lru) {
vmid = page_private(page);
list_del(&page->lru);
if (!is_secure_vmid(vmid) || !io_pgtable_hyp_unassign_page(vmid, page))
__free_page(page);
else
count--;
}
return count;
}
static struct shrinker io_pgtable_alloc_shrinker = {
.count_objects = io_pgtable_alloc_count_objects,
.scan_objects = io_pgtable_alloc_scan_objects,
.seeks = DEFAULT_SEEKS,
};
int qcom_io_pgtable_alloc_init(void)
{
return register_shrinker(&io_pgtable_alloc_shrinker, "io_pgtable_alloc");
}
void qcom_io_pgtable_alloc_exit(void)
{
unregister_shrinker(&io_pgtable_alloc_shrinker);
}

View File

@@ -0,0 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021,2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef __QCOM_IO_PGTABLE_ALLOC_H
#define __QCOM_IO_PGTABLE_ALLOC_H
int qcom_io_pgtable_allocator_register(u32 vmid);
void qcom_io_pgtable_allocator_unregister(u32 vmid);
struct page *qcom_io_pgtable_alloc_page(u32 vmid, gfp_t gfp);
void qcom_io_pgtable_free_page(struct page *page);
int qcom_io_pgtable_alloc_init(void);
void qcom_io_pgtable_alloc_exit(void);
#endif /* __QCOM_IO_PGTABLE_ALLOC_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,389 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#define pr_fmt(fmt) "iommu-debug: %s: " fmt, __func__
#include <linux/bitfield.h>
#include <linux/debugfs.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/of_platform.h>
#include <linux/qcom-iommu-util.h>
#include "qcom-iommu-debug.h"
#define USECASE_SWITCH_TIMEOUT_MSECS (500)
static int iommu_debug_nr_iters_set(void *data, u64 val)
{
struct iommu_debug_device *ddev = data;
if (!val)
val = 1;
if (val > 10000)
val = 10000;
ddev->nr_iters = (u32)val;
return 0;
}
static int iommu_debug_nr_iters_get(void *data, u64 *val)
{
struct iommu_debug_device *ddev = data;
*val = ddev->nr_iters;
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(iommu_debug_nr_iters_fops,
iommu_debug_nr_iters_get,
iommu_debug_nr_iters_set,
"%llu\n");
int iommu_debug_check_mapping_flags(struct device *dev, dma_addr_t iova, size_t size,
phys_addr_t expected_pa, u32 flags)
{
struct qcom_iommu_atos_txn txn;
struct iommu_fwspec *fwspec;
struct iommu_domain *domain;
domain = iommu_get_domain_for_dev(dev);
if (!domain) {
dev_err(dev, "iommu_get_domain_for_dev() failed\n");
return -EINVAL;
}
fwspec = dev_iommu_fwspec_get(dev);
if (!fwspec) {
dev_err(dev, "dev_iommu_fwspec_get() failed\n");
return -EINVAL;
}
txn.addr = iova;
txn.id = FIELD_GET(ARM_SMMU_SMR_ID, (fwspec->ids[0]));
txn.flags = flags;
size = PAGE_ALIGN(size);
while (size) {
phys_addr_t walk_pa, atos_pa;
atos_pa = qcom_iommu_iova_to_phys_hard(domain, &txn);
walk_pa = iommu_iova_to_phys(domain, iova);
if (expected_pa != atos_pa || expected_pa != walk_pa) {
dev_err_ratelimited(dev,
"Bad translation for %pad! Expected: %pa Got: %pa (ATOS) %pa (Table Walk) sid=%08x\n",
&iova, &expected_pa, &atos_pa, &walk_pa, txn.id);
return -EINVAL;
}
size -= PAGE_SIZE;
iova += PAGE_SIZE;
expected_pa += PAGE_SIZE;
}
return 0;
}
int iommu_debug_check_mapping_sg_flags(struct device *dev, struct scatterlist *sgl,
unsigned int pgoffset, unsigned int dma_nents,
unsigned int nents, u32 flags)
{
int ret;
struct sg_page_iter piter;
struct sg_dma_page_iter diter;
for (__sg_page_iter_start(&piter, sgl, nents, pgoffset),
__sg_page_iter_start(&diter.base, sgl, dma_nents, pgoffset);
__sg_page_iter_next(&piter) && __sg_page_iter_dma_next(&diter);) {
struct page *page = sg_page_iter_page(&piter);
dma_addr_t dma_addr = sg_page_iter_dma_address(&diter);
ret = iommu_debug_check_mapping_flags(dev, dma_addr, PAGE_SIZE,
page_to_phys(page), flags);
if (ret)
return ret;
}
return 0;
}
static void iommu_debug_destroy_test_dev(struct iommu_debug_device *ddev)
{
if (ddev->test_dev) {
of_platform_device_destroy(ddev->test_dev, NULL);
ddev->test_dev = NULL;
ddev->domain = NULL;
}
}
/*
* Returns struct device corresponding to the new usecase.
* ddev->test_dev will change - caller must not use old value!
* Caller must hold ddev->state_lock
*/
struct device *
iommu_debug_switch_usecase(struct iommu_debug_device *ddev, u32 usecase_nr)
{
struct platform_device *test_pdev;
struct device_node *child;
const char *str;
int child_nr = 0;
int ret;
if (ddev->test_dev)
iommu_debug_destroy_test_dev(ddev);
if (usecase_nr >= of_get_child_count(ddev->self->of_node)) {
dev_err(ddev->self, "Invalid usecase nr requested: %u\n",
usecase_nr);
return NULL;
}
reinit_completion(&ddev->probe_wait);
for_each_child_of_node(ddev->self->of_node, child) {
if (child_nr == usecase_nr)
break;
child_nr++;
}
test_pdev = of_platform_device_create(child, NULL, ddev->self);
if (!test_pdev) {
dev_err(ddev->self, "Creating platform device failed\n");
return NULL;
}
/*
* Wait for child device's probe function to be called.
* Its very unlikely to be asynchonrous...
*/
ret = wait_for_completion_interruptible_timeout(&ddev->probe_wait,
msecs_to_jiffies(USECASE_SWITCH_TIMEOUT_MSECS));
if (ret <= 0) {
dev_err(ddev->self, "Timed out waiting for usecase to register\n");
goto out;
}
if (of_property_read_string(child, "qcom,iommu-dma", &str))
str = "default";
ddev->fastmap_usecase = !strcmp(str, "fastmap");
ddev->usecase_nr = usecase_nr;
ddev->test_dev = &test_pdev->dev;
ddev->domain = iommu_get_domain_for_dev(ddev->test_dev);
if (!ddev->domain) {
dev_err(ddev->self, "Oops, usecase not associated with iommu\n");
goto out;
}
return ddev->test_dev;
out:
iommu_debug_destroy_test_dev(ddev);
return NULL;
}
/*
* Caller must hold ddev->state_lock
*/
struct device *iommu_debug_usecase_reset(struct iommu_debug_device *ddev)
{
return iommu_debug_switch_usecase(ddev, ddev->usecase_nr);
}
static int iommu_debug_usecase_register(struct device *dev)
{
struct iommu_debug_device *ddev = dev_get_drvdata(dev->parent);
complete(&ddev->probe_wait);
return 0;
}
static ssize_t iommu_debug_usecase_read(struct file *file, char __user *ubuf,
size_t count, loff_t *offset)
{
struct iommu_debug_device *ddev = file->private_data;
return simple_read_from_buffer(ubuf, count, offset, ddev->buffer,
strnlen(ddev->buffer, PAGE_SIZE));
}
static ssize_t iommu_debug_usecase_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *offset)
{
struct iommu_debug_device *ddev = file->private_data;
unsigned int usecase_nr;
int ret;
ret = kstrtouint_from_user(ubuf, count, 0, &usecase_nr);
if (ret || usecase_nr >= ddev->nr_children)
return -EINVAL;
mutex_lock(&ddev->state_lock);
if (!iommu_debug_switch_usecase(ddev, usecase_nr)) {
mutex_unlock(&ddev->state_lock);
return -EINVAL;
}
mutex_unlock(&ddev->state_lock);
return count;
}
static const struct file_operations iommu_debug_usecase_fops = {
.open = simple_open,
.read = iommu_debug_usecase_read,
.write = iommu_debug_usecase_write,
.llseek = no_llseek,
};
static int iommu_debug_debugfs_setup(struct iommu_debug_device *ddev)
{
struct dentry *dir;
dir = debugfs_create_dir("iommu-test", NULL);
if (IS_ERR(dir))
return -EINVAL;
ddev->root_dir = dir;
debugfs_create_file("usecase", 0600, dir, ddev, &iommu_debug_usecase_fops);
debugfs_create_file("functional_arm_dma_api", 0400, dir, ddev,
&iommu_debug_functional_arm_dma_api_fops);
debugfs_create_file("functional_fast_dma_api", 0400, dir, ddev,
&iommu_debug_functional_fast_dma_api_fops);
debugfs_create_file("atos", 0600, dir, ddev, &iommu_debug_atos_fops);
debugfs_create_file("map", 0200, dir, ddev, &iommu_debug_map_fops);
debugfs_create_file("unmap", 0200, dir, ddev, &iommu_debug_unmap_fops);
debugfs_create_file("dma_map", 0200, dir, ddev, &iommu_debug_dma_map_fops);
debugfs_create_file("dma_unmap", 0200, dir, ddev, &iommu_debug_dma_unmap_fops);
debugfs_create_file("nr_iters", 0600, dir, ddev, &iommu_debug_nr_iters_fops);
debugfs_create_file("test_virt_addr", 0400, dir, ddev, &iommu_debug_test_virt_addr_fops);
debugfs_create_file("profiling", 0400, dir, ddev, &iommu_debug_profiling_fops);
return 0;
}
static int iommu_debug_probe(struct platform_device *pdev)
{
struct iommu_debug_device *ddev;
struct device *dev = &pdev->dev;
struct device_node *child;
int ret;
int offset = 0;
ddev = devm_kzalloc(dev, sizeof(*ddev), GFP_KERNEL);
if (!ddev)
return -ENOMEM;
ddev->self = dev;
ddev->usecase_nr = U32_MAX;
ddev->nr_iters = 1;
mutex_init(&ddev->state_lock);
init_completion(&ddev->probe_wait);
ddev->buffer = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
if (!ddev->buffer) {
ret = -ENOMEM;
goto out;
}
ddev->nr_children = 0;
for_each_child_of_node(dev->of_node, child) {
offset += scnprintf(ddev->buffer + offset, PAGE_SIZE - offset,
"%d: %s\n", ddev->nr_children, child->name);
if (offset + 1 == PAGE_SIZE) {
dev_err(dev, "Too many testcases?\n");
break;
}
ddev->nr_children++;
}
dev_set_drvdata(dev, ddev);
ret = iommu_debug_debugfs_setup(ddev);
if (ret)
goto out;
return 0;
out:
mutex_destroy(&ddev->state_lock);
return ret;
}
static int iommu_debug_remove(struct platform_device *pdev)
{
struct iommu_debug_device *ddev = platform_get_drvdata(pdev);
debugfs_remove_recursive(ddev->root_dir);
if (ddev->test_dev)
of_platform_device_destroy(ddev->test_dev, NULL);
mutex_destroy(&ddev->state_lock);
return 0;
}
static const struct of_device_id iommu_debug_of_match[] = {
{ .compatible = "qcom,iommu-debug-test" },
{ },
};
static struct platform_driver iommu_debug_driver = {
.probe = iommu_debug_probe,
.remove = iommu_debug_remove,
.driver = {
.name = "qcom-iommu-debug",
.of_match_table = iommu_debug_of_match,
},
};
/*
* This isn't really a "driver", we just need something in the device tree
* to hook up to the `iommus' property.
*/
static int iommu_debug_usecase_probe(struct platform_device *pdev)
{
return iommu_debug_usecase_register(&pdev->dev);
}
static const struct of_device_id iommu_debug_usecase_of_match[] = {
{ .compatible = "qcom,iommu-debug-usecase" },
{ },
};
static struct platform_driver iommu_debug_usecase_driver = {
.probe = iommu_debug_usecase_probe,
.driver = {
.name = "qcom-iommu-debug-usecase",
.of_match_table = iommu_debug_usecase_of_match,
},
};
static int iommu_debug_init(void)
{
int ret;
ret = platform_driver_register(&iommu_debug_driver);
if (ret)
return ret;
ret = platform_driver_register(&iommu_debug_usecase_driver);
if (ret)
platform_driver_unregister(&iommu_debug_driver);
return ret;
}
static void iommu_debug_exit(void)
{
platform_driver_unregister(&iommu_debug_usecase_driver);
platform_driver_unregister(&iommu_debug_driver);
}
module_init(iommu_debug_init);
module_exit(iommu_debug_exit);
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,76 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2015-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef __DRIVERS_IOMMU_QCOM_IOMMU_DEBUG_H__
#define __DRIVERS_IOMMU_QCOM_IOMMU_DEBUG_H__
#include <linux/device.h>
#include <linux/debugfs.h>
#include <linux/iommu.h>
#include <linux/completion.h>
#include <linux/mutex.h>
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
#define ARM_SMMU_SMR_ID GENMASK(15, 0)
struct iommu_debug_device {
struct device *self;
u32 nr_children;
char *buffer;
struct dentry *root_dir;
/* for usecase under test */
struct device *test_dev;
struct iommu_domain *domain;
u32 usecase_nr;
bool fastmap_usecase;
/* Protects test_dev */
struct mutex state_lock;
/* For waiting for child probe to complete */
struct completion probe_wait;
/* Used for atos */
u64 iova;
/* number of iterations */
u32 nr_iters;
};
struct device *iommu_debug_usecase_reset(struct iommu_debug_device *ddev);
struct device *iommu_debug_switch_usecase(struct iommu_debug_device *ddev, u32 usecase_nr);
int iommu_debug_check_mapping_flags(struct device *dev, dma_addr_t iova, size_t size,
phys_addr_t expected_pa, u32 flags);
#define iommu_debug_check_mapping(d, i, s, p) \
iommu_debug_check_mapping_flags(d, i, s, p, 0)
/* Only checks a single page */
#define iommu_debug_check_mapping_fast(d, i, s, p) \
iommu_debug_check_mapping_flags(d, i, PAGE_SIZE, p, 0)
int iommu_debug_check_mapping_sg_flags(struct device *dev, struct scatterlist *sgl,
unsigned int pgoffset, unsigned int dma_nents,
unsigned int nents, u32 flags);
#define iommu_debug_check_mapping_sg(d, s, o, e1, e2) \
iommu_debug_check_mapping_sg_flags(d, s, o, e1, e2, 0)
/* Only checks the last page of first sgl */
static inline int iommu_debug_check_mapping_sg_fast(struct device *dev, struct scatterlist *sgl,
unsigned int pgoffset, unsigned int dma_nents,
unsigned int nents)
{
pgoffset = PAGE_ALIGN(sgl->offset + sgl->length) >> PAGE_SHIFT;
return iommu_debug_check_mapping_sg_flags(dev, sgl, pgoffset - 1, dma_nents, 1, 0);
}
extern const struct file_operations iommu_debug_functional_arm_dma_api_fops;
extern const struct file_operations iommu_debug_functional_fast_dma_api_fops;
extern const struct file_operations iommu_debug_atos_fops;
extern const struct file_operations iommu_debug_map_fops;
extern const struct file_operations iommu_debug_unmap_fops;
extern const struct file_operations iommu_debug_dma_map_fops;
extern const struct file_operations iommu_debug_dma_unmap_fops;
extern const struct file_operations iommu_debug_test_virt_addr_fops;
extern const struct file_operations iommu_debug_profiling_fops;
#endif

File diff suppressed because it is too large Load Diff