>-----Original Message----- >From: CLEMENT MATHIEU--DRIF <clement.mathieu--d...@eviden.com> >Subject: Re: [PATCH rfcv2 06/17] intel_iommu: Implement stage-1 >translation > >Hi Zhenzhong, > >I already sent you my comments about this patch earlier (question about >checking pgtt) but here is a style review > >On 22/05/2024 08:23, Zhenzhong Duan wrote: >> Caution: External email. Do not open attachments or click links, unless this >email comes from a known sender and you know the content is safe. >> >> >> From: Yi Liu <yi.l....@intel.com> >> >> This adds stage-1 page table walking to support stage-1 only >> transltion in scalable modern mode. >> >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Yi Sun <yi.y....@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> hw/i386/intel_iommu_internal.h | 17 +++++ >> hw/i386/intel_iommu.c | 128 >+++++++++++++++++++++++++++++++-- >> 2 files changed, 141 insertions(+), 4 deletions(-) >> >> diff --git a/hw/i386/intel_iommu_internal.h >b/hw/i386/intel_iommu_internal.h >> index 0e240d6d54..abfdbd5f65 100644 >> --- a/hw/i386/intel_iommu_internal.h >> +++ b/hw/i386/intel_iommu_internal.h >> @@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest- >address-width */ >> #define VTD_SM_PASID_ENTRY_DID(val) ((val) & >VTD_DOMAIN_ID_MASK) >> >> +#define VTD_SM_PASID_ENTRY_FLPM 3ULL >> +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) >> + >> +/* Paging Structure common */ >> +#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7) >> +/* Bits to decide the offset for each level */ >> +#define VTD_FL_LEVEL_BITS 9 >> + >> +/* First Level Paging Structure */ >> +#define VTD_FL_PT_LEVEL 1 >> +#define VTD_FL_PT_ENTRY_NR 512 >> + >> +/* Masks for First Level Paging Entry */ >> +#define VTD_FL_RW_MASK (1ULL << 1) >> +#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & >VTD_HAW_MASK(aw)) >> +#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing >Disable */ >> + >> /* Second Level Page Translation Pointer*/ >> #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) >> >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 544e8f0e40..cf29809bc1 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -50,6 +50,8 @@ >> /* pe operations */ >> #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & >VTD_SM_PASID_ENTRY_PGTT) >> #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & >VTD_SM_PASID_ENTRY_AW)) >> +#define VTD_PE_GET_FLPT_LEVEL(pe) \ >> + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) >> >> /* >> * PCI bus number (or SID) is not reliable since the device is usaully >> @@ -823,6 +825,11 @@ static int >vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, >> return -VTD_FR_PASID_TABLE_ENTRY_INV; >> } >> >> + if (pgtt == VTD_SM_PASID_ENTRY_FLT && >> + VTD_PE_GET_FLPT_LEVEL(pe) != 4) { >Maybe you could add a function to check if the level is supported. >And it would also be nice to rename vtd_is_level_supported (used just >above these lines) to make it clear that it's only relevant for second >level translations and avoid mistakes
Sure, will do. Thanks Zhenzhong >> + return -VTD_FR_PASID_TABLE_ENTRY_INV; >> + } >> + >> return 0; >> } >> >> @@ -958,7 +965,11 @@ static uint32_t >vtd_get_iova_level(IntelIOMMUState *s, >> >> if (s->root_scalable) { >> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); >> - return VTD_PE_GET_LEVEL(&pe); >> + if (s->scalable_modern) { >> + return VTD_PE_GET_FLPT_LEVEL(&pe); >> + } else { >> + return VTD_PE_GET_LEVEL(&pe); >same, could be renamed >> + } >> } >> >> return vtd_ce_get_level(ce); >> @@ -1045,7 +1056,11 @@ static dma_addr_t >vtd_get_iova_pgtbl_base(IntelIOMMUState *s, >> >> if (s->root_scalable) { >> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); >> - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; >> + if (s->scalable_modern) { >> + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; >> + } else { >> + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; >> + } >> } >> >> return vtd_ce_get_slpt_base(ce); >> @@ -1847,6 +1862,106 @@ out: >> trace_vtd_pt_enable_fast_path(source_id, success); >> } >> >> +/* The shift of an addr for a certain level of paging structure */ >> +static inline uint32_t vtd_flpt_level_shift(uint32_t level) >> +{ >> + assert(level != 0); >> + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS; >> +} >> + >> +/* >> + * Given an iova and the level of paging structure, return the offset >> + * of current level. >> + */ >> +static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t >level) >> +{ >> + return (iova >> vtd_flpt_level_shift(level)) & >> + ((1ULL << VTD_FL_LEVEL_BITS) - 1); >> +} >> + >> +/* Get the content of a flpte located in @base_addr[@index] */ >> +static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index) >> +{ >> + uint64_t flpte; >> + >> + assert(index < VTD_FL_PT_ENTRY_NR); >> + >> + if (dma_memory_read(&address_space_memory, >> + base_addr + index * sizeof(flpte), &flpte, >> + sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) { >> + flpte = (uint64_t)-1; >> + return flpte; >> + } >> + flpte = le64_to_cpu(flpte); >> + return flpte; >> +} >> + >> +static inline bool vtd_flpte_present(uint64_t flpte) >> +{ >> + return !!(flpte & 0x1); >Shouldn't we use a #define instead of hardcoding the mask? >> +} >> + >> +/* Whether the pte indicates the address of the page frame */ >> +static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level) >> +{ >> + return level == VTD_FL_PT_LEVEL || (flpte & >VTD_FL_PT_PAGE_SIZE_MASK); >> +} >> + >> +static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw) >> +{ >> + return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw); >> +} >> + >> +/* >> + * Given the @iova, get relevant @flptep. @flpte_level will be the last >level >> + * of the translation, can be used for deciding the size of large page. >> + */ >> +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, >> + uint64_t iova, bool is_write, >> + uint64_t *flptep, uint32_t *flpte_level, >> + bool *reads, bool *writes, uint8_t aw_bits, >> + uint32_t pasid) >> +{ >> + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); >> + uint32_t level = vtd_get_iova_level(s, ce, pasid); >> + uint32_t offset; >> + uint64_t flpte; >> + >> + while (true) { >> + offset = vtd_iova_fl_level_offset(iova, level); >> + flpte = vtd_get_flpte(addr, offset); >> + if (flpte == (uint64_t)-1) { >> + if (level == vtd_get_iova_level(s, ce, pasid)) { >> + /* Invalid programming of context-entry */ >> + return -VTD_FR_CONTEXT_ENTRY_INV; >> + } else { >> + return -VTD_FR_PAGING_ENTRY_INV; >> + } >> + } >> + >> + if (!vtd_flpte_present(flpte)) { >> + *reads = false; >> + *writes = false; >> + return -VTD_FR_PAGING_ENTRY_INV; >> + } >> + >> + *reads = true; >> + *writes = (*writes) && (flpte & VTD_FL_RW_MASK); >> + if (is_write && !(flpte & VTD_FL_RW_MASK)) { >> + return -VTD_FR_WRITE; >> + } >> + >> + if (vtd_is_last_flpte(flpte, level)) { >> + *flptep = flpte; >> + *flpte_level = level; >> + return 0; >> + } >> + >> + addr = vtd_get_flpte_addr(flpte, aw_bits); >> + level--; >> + } >> +} >> + >> static void vtd_report_fault(IntelIOMMUState *s, >> int err, bool is_fpd_set, >> uint16_t source_id, >> @@ -1995,8 +2110,13 @@ static bool >vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, >> } >> } >> >> - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, >> - &reads, &writes, s->aw_bits, pasid); >> + if (s->scalable_modern) { >> + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, >> + &reads, &writes, s->aw_bits, pasid); >> + } else { >> + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, >> + &reads, &writes, s->aw_bits, pasid); >> + } >> >> if (ret_fr) { >> vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, >> -- >> 2.34.1 >> >#cmd