Hi Mark, On Tue, 23 Dec 2025 at 01:21, Mark Brown <[email protected]> wrote: > > Currently we enable EL0 and EL1 access to FA64 and ZT0 at boot and leave > them enabled throughout the runtime of the system. When we add KVM support > we will need to make this configuration dynamic, these features may be > disabled for some KVM guests. Since the host kernel saves the floating > point state for non-protected guests and we wish to avoid KVM having to > reload the floating point state needlessly on guest reentry let's move the > configuration of these enables to the floating point state reload. > > We provide a helper which does the configuration as part of a > read/modify/write operation along with the configuration of the task VL, > then update the floating point state load and SME access trap to use it. > We also remove the setting of the enable bits from the CPU feature > identification and resume paths. There will be a small overhead from > setting the enables one at a time but this should be negligable in the
nit: negligible > context of the state load or access trap. In order to avoid compiler > warnings due to unused variables in !CONFIG_ARM64_SME cases we avoid > storing the vector length in temporary variables. > > Signed-off-by: Mark Brown <[email protected]> > --- > arch/arm64/include/asm/fpsimd.h | 14 ++++++++++++ > arch/arm64/kernel/cpufeature.c | 2 -- > arch/arm64/kernel/fpsimd.c | 47 > +++++++++++------------------------------ > 3 files changed, 26 insertions(+), 37 deletions(-) > > diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h > index 1d2e33559bd5..ece65061dea0 100644 > --- a/arch/arm64/include/asm/fpsimd.h > +++ b/arch/arm64/include/asm/fpsimd.h > @@ -428,6 +428,18 @@ static inline size_t sme_state_size(struct task_struct > const *task) > return __sme_state_size(task_get_sme_vl(task)); > } > > +#define sme_cond_update_smcr(vl, fa64, zt0, reg) \ > + do { \ > + u64 __old = read_sysreg_s((reg)); \ > + u64 __new = vl; \ > + if (fa64) \ > + __new |= SMCR_ELx_FA64; \ > + if (zt0) \ > + __new |= SMCR_ELx_EZT0; \ > + if (__old != __new) \ > + write_sysreg_s(__new, (reg)); \ > + } while (0) > + Should we cap the VL based on SMCR_ELx_LEN_MASK (as we do in sve_cond_update_zcr_vq())? Should we also preserve the remaining old bits from SMCR_EL1, i.e., copy over the bits that aren't SMCR_ELx_LEN_MASK|SMCR_ELx_FA64|SMCR_ELx_EZT0? For now they are RES0, but that could change. Cheers, /fuad > #else > > static inline void sme_user_disable(void) { BUILD_BUG(); } > @@ -456,6 +468,8 @@ static inline size_t sme_state_size(struct task_struct > const *task) > return 0; > } > > +#define sme_cond_update_smcr(val, fa64, zt0, reg) do { } while (0) > + > #endif /* ! CONFIG_ARM64_SME */ > > /* For use by EFI runtime services calls only */ > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index c840a93b9ef9..ca9e66cc62d8 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -2965,7 +2965,6 @@ static const struct arm64_cpu_capabilities > arm64_features[] = { > .type = ARM64_CPUCAP_SYSTEM_FEATURE, > .capability = ARM64_SME_FA64, > .matches = has_cpuid_feature, > - .cpu_enable = cpu_enable_fa64, > ARM64_CPUID_FIELDS(ID_AA64SMFR0_EL1, FA64, IMP) > }, > { > @@ -2973,7 +2972,6 @@ static const struct arm64_cpu_capabilities > arm64_features[] = { > .type = ARM64_CPUCAP_SYSTEM_FEATURE, > .capability = ARM64_SME2, > .matches = has_cpuid_feature, > - .cpu_enable = cpu_enable_sme2, > ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, SME2) > }, > #endif /* CONFIG_ARM64_SME */ > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c > index c154f72634e0..be4499ff6498 100644 > --- a/arch/arm64/kernel/fpsimd.c > +++ b/arch/arm64/kernel/fpsimd.c > @@ -405,11 +405,15 @@ static void task_fpsimd_load(void) > > /* Restore SME, override SVE register configuration if needed */ > if (system_supports_sme()) { > - unsigned long sme_vl = task_get_sme_vl(current); > - > - /* Ensure VL is set up for restoring data */ > + /* > + * Ensure VL is set up for restoring data. KVM might > + * disable subfeatures so we reset them each time. > + */ > if (test_thread_flag(TIF_SME)) > - sme_set_vq(sve_vq_from_vl(sme_vl) - 1); > + > sme_cond_update_smcr(sve_vq_from_vl(task_get_sme_vl(current)) - 1, > + system_supports_fa64(), > + system_supports_sme2(), > + SYS_SMCR_EL1); > > write_sysreg_s(current->thread.svcr, SYS_SVCR); > > @@ -1250,26 +1254,6 @@ void cpu_enable_sme(const struct > arm64_cpu_capabilities *__always_unused p) > isb(); > } > > -void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p) > -{ > - /* This must be enabled after SME */ > - BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME); > - > - /* Allow use of ZT0 */ > - write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK, > - SYS_SMCR_EL1); > -} > - > -void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p) > -{ > - /* This must be enabled after SME */ > - BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME); > - > - /* Allow use of FA64 */ > - write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK, > - SYS_SMCR_EL1); > -} > - > void __init sme_setup(void) > { > struct vl_info *info = &vl_info[ARM64_VEC_SME]; > @@ -1314,17 +1298,9 @@ void __init sme_setup(void) > > void sme_suspend_exit(void) > { > - u64 smcr = 0; > - > if (!system_supports_sme()) > return; > > - if (system_supports_fa64()) > - smcr |= SMCR_ELx_FA64; > - if (system_supports_sme2()) > - smcr |= SMCR_ELx_EZT0; > - > - write_sysreg_s(smcr, SYS_SMCR_EL1); > write_sysreg_s(0, SYS_SMPRI_EL1); > } > > @@ -1439,9 +1415,10 @@ void do_sme_acc(unsigned long esr, struct pt_regs > *regs) > WARN_ON(1); > > if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { > - unsigned long vq_minus_one = > - sve_vq_from_vl(task_get_sme_vl(current)) - 1; > - sme_set_vq(vq_minus_one); > + sme_cond_update_smcr(sve_vq_from_vl(task_get_sme_vl(current)) > - 1, > + system_supports_fa64(), > + system_supports_sme2(), > + SYS_SMCR_EL1); > > fpsimd_bind_task_to_cpu(); > } else { > > -- > 2.47.3 >
