The obvious key to the performance optimization of commit 587e6c10a7ce ("iommu/arm-smmu-v3: Reduce contention during command-queue insertion") is to allow multiple cores to insert commands in parallel after a brief mutex contention.
Obviously, inserting as many commands at a time as possible can reduce the number of times the mutex contention participates, thereby improving the overall performance. At least it reduces the number of calls to function arm_smmu_cmdq_issue_cmdlist(). Therefore, function arm_smmu_cmdq_issue_cmd_with_sync() is added to insert the 'cmd+sync' commands at a time. Signed-off-by: Zhen Lei <thunder.leiz...@huawei.com> --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +++++++++++++-------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 2433d3c29b49ff2..a5361153ca1d6a4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -858,11 +858,25 @@ static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false); } -static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +static int __maybe_unused arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) { return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true); } +static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + + if (arm_smmu_cmdq_build_cmd(cmd, ent)) { + dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", + ent->opcode); + return -EINVAL; + } + + return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, true); +} + static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) @@ -928,8 +942,7 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) .tlbi.asid = asid, }; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, @@ -1210,8 +1223,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) }, }; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, @@ -1823,8 +1835,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) } else { cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); } @@ -3338,18 +3349,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Invalidate any cached configuration */ cmd.opcode = CMDQ_OP_CFGI_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); /* Invalidate any stale TLB entries */ if (smmu->features & ARM_SMMU_FEAT_HYP) { cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); -- 2.26.0.106.g9fadedd _______________________________________________ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu