Hi Zhenzhong, I already sent you my comments about this patch earlier (question about checking pgtt) but here is a style review
On 22/05/2024 08:23, Zhenzhong Duan wrote: > Caution: External email. Do not open attachments or click links, unless this > email comes from a known sender and you know the content is safe. > > > From: Yi Liu <yi.l....@intel.com> > > This adds stage-1 page table walking to support stage-1 only > transltion in scalable modern mode. > > Signed-off-by: Yi Liu <yi.l....@intel.com> > Signed-off-by: Yi Sun <yi.y....@linux.intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> > --- > hw/i386/intel_iommu_internal.h | 17 +++++ > hw/i386/intel_iommu.c | 128 +++++++++++++++++++++++++++++++-- > 2 files changed, 141 insertions(+), 4 deletions(-) > > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h > index 0e240d6d54..abfdbd5f65 100644 > --- a/hw/i386/intel_iommu_internal.h > +++ b/hw/i386/intel_iommu_internal.h > @@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry; > #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width > */ > #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) > > +#define VTD_SM_PASID_ENTRY_FLPM 3ULL > +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) > + > +/* Paging Structure common */ > +#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7) > +/* Bits to decide the offset for each level */ > +#define VTD_FL_LEVEL_BITS 9 > + > +/* First Level Paging Structure */ > +#define VTD_FL_PT_LEVEL 1 > +#define VTD_FL_PT_ENTRY_NR 512 > + > +/* Masks for First Level Paging Entry */ > +#define VTD_FL_RW_MASK (1ULL << 1) > +#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & > VTD_HAW_MASK(aw)) > +#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable > */ > + > /* Second Level Page Translation Pointer*/ > #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 544e8f0e40..cf29809bc1 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -50,6 +50,8 @@ > /* pe operations */ > #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) > #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & > VTD_SM_PASID_ENTRY_AW)) > +#define VTD_PE_GET_FLPT_LEVEL(pe) \ > + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) > > /* > * PCI bus number (or SID) is not reliable since the device is usaully > @@ -823,6 +825,11 @@ static int > vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, > return -VTD_FR_PASID_TABLE_ENTRY_INV; > } > > + if (pgtt == VTD_SM_PASID_ENTRY_FLT && > + VTD_PE_GET_FLPT_LEVEL(pe) != 4) { Maybe you could add a function to check if the level is supported. And it would also be nice to rename vtd_is_level_supported (used just above these lines) to make it clear that it's only relevant for second level translations and avoid mistakes > + return -VTD_FR_PASID_TABLE_ENTRY_INV; > + } > + > return 0; > } > > @@ -958,7 +965,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s, > > if (s->root_scalable) { > vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); > - return VTD_PE_GET_LEVEL(&pe); > + if (s->scalable_modern) { > + return VTD_PE_GET_FLPT_LEVEL(&pe); > + } else { > + return VTD_PE_GET_LEVEL(&pe); same, could be renamed > + } > } > > return vtd_ce_get_level(ce); > @@ -1045,7 +1056,11 @@ static dma_addr_t > vtd_get_iova_pgtbl_base(IntelIOMMUState *s, > > if (s->root_scalable) { > vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); > - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; > + if (s->scalable_modern) { > + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; > + } else { > + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; > + } > } > > return vtd_ce_get_slpt_base(ce); > @@ -1847,6 +1862,106 @@ out: > trace_vtd_pt_enable_fast_path(source_id, success); > } > > +/* The shift of an addr for a certain level of paging structure */ > +static inline uint32_t vtd_flpt_level_shift(uint32_t level) > +{ > + assert(level != 0); > + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS; > +} > + > +/* > + * Given an iova and the level of paging structure, return the offset > + * of current level. > + */ > +static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t > level) > +{ > + return (iova >> vtd_flpt_level_shift(level)) & > + ((1ULL << VTD_FL_LEVEL_BITS) - 1); > +} > + > +/* Get the content of a flpte located in @base_addr[@index] */ > +static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index) > +{ > + uint64_t flpte; > + > + assert(index < VTD_FL_PT_ENTRY_NR); > + > + if (dma_memory_read(&address_space_memory, > + base_addr + index * sizeof(flpte), &flpte, > + sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) { > + flpte = (uint64_t)-1; > + return flpte; > + } > + flpte = le64_to_cpu(flpte); > + return flpte; > +} > + > +static inline bool vtd_flpte_present(uint64_t flpte) > +{ > + return !!(flpte & 0x1); Shouldn't we use a #define instead of hardcoding the mask? > +} > + > +/* Whether the pte indicates the address of the page frame */ > +static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level) > +{ > + return level == VTD_FL_PT_LEVEL || (flpte & VTD_FL_PT_PAGE_SIZE_MASK); > +} > + > +static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw) > +{ > + return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw); > +} > + > +/* > + * Given the @iova, get relevant @flptep. @flpte_level will be the last level > + * of the translation, can be used for deciding the size of large page. > + */ > +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, > + uint64_t iova, bool is_write, > + uint64_t *flptep, uint32_t *flpte_level, > + bool *reads, bool *writes, uint8_t aw_bits, > + uint32_t pasid) > +{ > + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); > + uint32_t level = vtd_get_iova_level(s, ce, pasid); > + uint32_t offset; > + uint64_t flpte; > + > + while (true) { > + offset = vtd_iova_fl_level_offset(iova, level); > + flpte = vtd_get_flpte(addr, offset); > + if (flpte == (uint64_t)-1) { > + if (level == vtd_get_iova_level(s, ce, pasid)) { > + /* Invalid programming of context-entry */ > + return -VTD_FR_CONTEXT_ENTRY_INV; > + } else { > + return -VTD_FR_PAGING_ENTRY_INV; > + } > + } > + > + if (!vtd_flpte_present(flpte)) { > + *reads = false; > + *writes = false; > + return -VTD_FR_PAGING_ENTRY_INV; > + } > + > + *reads = true; > + *writes = (*writes) && (flpte & VTD_FL_RW_MASK); > + if (is_write && !(flpte & VTD_FL_RW_MASK)) { > + return -VTD_FR_WRITE; > + } > + > + if (vtd_is_last_flpte(flpte, level)) { > + *flptep = flpte; > + *flpte_level = level; > + return 0; > + } > + > + addr = vtd_get_flpte_addr(flpte, aw_bits); > + level--; > + } > +} > + > static void vtd_report_fault(IntelIOMMUState *s, > int err, bool is_fpd_set, > uint16_t source_id, > @@ -1995,8 +2110,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace > *vtd_as, PCIBus *bus, > } > } > > - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, > - &reads, &writes, s->aw_bits, pasid); > + if (s->scalable_modern) { > + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, > + &reads, &writes, s->aw_bits, pasid); > + } else { > + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, > + &reads, &writes, s->aw_bits, pasid); > + } > > if (ret_fr) { > vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, > -- > 2.34.1 > #cmd