> Date: Fri, 4 Sep 2020 00:50:44 -0500
> From: Jordan Hargrave <jordan_hargr...@hotmail.com>

A few hints below...

> > > +
> > > +/* Page Table Entry per domain */
> > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > +
> > > +/* Alias mapping */
> > > +#define SID_INVALID 0x80000000L
> > > +static uint32_t sid_flag[65536];
> > 
> > Can we avoid having these large arrays, or at least allocate them
> > dynamically?  That would also avoid the explicit alignment which is
> > somewhat nasty since it affects the entire kernel.
> 
> OK. But the hwdte does need the 2M area to be all contiguous but it is not
> needed for DMAR/Intel.  You *can* have up to 8 different device table entries
> though to split up the area.

The appropriate interface to use in this context is
bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
to 1, you will get memory that is physicaly contiguous.

To map the memory into kernel address space you'll need create a map
using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
then populates the physical addresses.

Many of the drivers written by dlg@ define convenience functions to do
all these steps, although interestingly enough he tends to use
bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
sub-optimal.

> > > +
> > > +struct domain_dev {
> > > + int                     sid;
> > > + int                     sec;
> > > + int                     sub;
> > > + TAILQ_ENTRY(domain_dev) link;
> > > +};
> > > +
> > > +struct domain {
> > > + struct iommu_softc      *iommu;
> > > + int                     did;
> > > + int                     gaw;
> > > + struct pte_entry        *pte;
> > > + paddr_t                 ptep;
> > > + struct bus_dma_tag      dmat;
> > > + int                     flag;
> > > +
> > > + struct mutex            exlck;
> > > + char                    exname[32];
> > > + struct extent           *iovamap;
> > > + TAILQ_HEAD(,domain_dev) devices;
> > > + TAILQ_ENTRY(domain)     link;
> > > +};
> > > +
> > > +#define DOM_DEBUG 0x1
> > > +#define DOM_NOMAP 0x2
> > > +
> > > +struct dmar_devlist {
> > > + int                             type;
> > > + int                             bus;
> > > + int                             ndp;
> > > + struct acpidmar_devpath         *dp;
> > > + TAILQ_ENTRY(dmar_devlist)       link;
> > > +};
> > > +
> > > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > > +
> > > +struct ivhd_devlist {
> > > + int                             start_id;
> > > + int                             end_id;
> > > + int                             cfg;
> > > + TAILQ_ENTRY(ivhd_devlist)       link;
> > > +};
> > > +
> > > +struct rmrr_softc {
> > > + TAILQ_ENTRY(rmrr_softc) link;
> > > + struct devlist_head     devices;
> > > + int                     segment;
> > > + uint64_t                start;
> > > + uint64_t                end;
> > > +};
> > > +
> > > +struct atsr_softc {
> > > + TAILQ_ENTRY(atsr_softc) link;
> > > + struct devlist_head     devices;
> > > + int                     segment;
> > > + int                     flags;
> > > +};
> > > +
> > > +struct iommu_pic {
> > > + struct pic              pic;
> > > + struct iommu_softc      *iommu;
> > > +};
> > > +
> > > +#define IOMMU_FLAGS_CATCHALL             0x1
> > > +#define IOMMU_FLAGS_BAD                  0x2
> > > +#define IOMMU_FLAGS_SUSPEND              0x4
> > > +
> > > +struct iommu_softc {
> > > + TAILQ_ENTRY(iommu_softc)link;
> > > + struct devlist_head     devices;
> > > + int                     id;
> > > + int                     flags;
> > > + int                     segment;
> > > +
> > > + struct mutex            reg_lock;
> > > +
> > > + bus_space_tag_t         iot;
> > > + bus_space_handle_t      ioh;
> > > +
> > > + uint64_t                cap;
> > > + uint64_t                ecap;
> > > + uint32_t                gcmd;
> > > +
> > > + int                     mgaw;
> > > + int                     agaw;
> > > + int                     ndoms;
> > > +
> > > + struct root_entry       *root;
> > > + struct context_entry    *ctx[256];
> > > +
> > > + void                    *intr;
> > > + struct iommu_pic        pic;
> > > + int                     fedata;
> > > + uint64_t                feaddr;
> > > + uint64_t                rtaddr;
> > > +
> > > + // Queued Invalidation
> > > + int                     qi_head;
> > > + int                     qi_tail;
> > > + paddr_t                 qip;
> > > + struct qi_entry         *qi;
> > > +
> > > + struct domain           *unity;
> > > + TAILQ_HEAD(,domain)     domains;
> > > +
> > > + // AMD iommu
> > > + struct ivhd_dte         *dte;
> > > + void                    *cmd_tbl;
> > > + void                    *evt_tbl;
> > > + paddr_t                 cmd_tblp;
> > > + paddr_t                 evt_tblp;
> > > + uint64_t                wv[128] __aligned(4096);
> > 
> > This wv array isn't used as far as I can tell.
> 
> Ah I was doing some testing on the commands.. I keep getting iommu
> command timeouts

Hmm, yes, using the wv variable on the stack as you do below is a bit
suspect.  Using __aligned() for stack variables may not give you the
proper alignment if the stack alignment is smaller.

> > > +};
> > > +
> > > +static inline int iommu_bad(struct iommu_softc *sc)
> > > +{
> > > + return (sc->flags & IOMMU_FLAGS_BAD);
> > > +}
> > > +
> > > +static inline int iommu_enabled(struct iommu_softc *sc)
> > > +{
> > > + if (sc->dte) {
> > > +         return 1;
> > > + }
> > > + return (sc->gcmd & GCMD_TE);
> > > +}
> > > +
> > > +struct acpidmar_softc {
> > > + struct device           sc_dev;
> > > +
> > > + pci_chipset_tag_t       sc_pc;
> > > + bus_space_tag_t         sc_memt;
> > > + int                     sc_haw;
> > > + int                     sc_flags;
> > > +
> > > + TAILQ_HEAD(,iommu_softc)sc_drhds;
> > > + TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
> > > + TAILQ_HEAD(,atsr_softc) sc_atsrs;
> > > +};
> > > +
> > > +int              acpidmar_activate(struct device *, int);
> > > +int              acpidmar_match(struct device *, void *, void *);
> > > +void             acpidmar_attach(struct device *, struct device *, void 
> > > *);
> > > +struct domain   *acpidmar_pci_attach(struct acpidmar_softc *, int, int, 
> > > int);
> > > +
> > > +struct cfattach acpidmar_ca = {
> > > + sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
> > > +};
> > > +
> > > +struct cfdriver acpidmar_cd = {
> > > + NULL, "acpidmar", DV_DULL
> > > +};
> > > +
> > > +struct           acpidmar_softc *acpidmar_sc;
> > > +int              acpidmar_intr(void *);
> > > +int              acpiivhd_intr(void *);
> > > +
> > > +#define DID_UNITY 0x1
> > > +
> > > +void _dumppte(struct pte_entry *, int, vaddr_t);
> > > +
> > > +struct domain *domain_create(struct iommu_softc *, int);
> > > +struct domain *domain_lookup(struct acpidmar_softc *, int, int);
> > > +
> > > +void domain_unload_map(struct domain *, bus_dmamap_t);
> > > +void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char 
> > > *);
> > > +
> > > +void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
> > > +void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
> > > +void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
> > > +void domain_map_pthru(struct domain *, paddr_t, paddr_t);
> > > +
> > > +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
> > > +void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
> > > +    struct devlist_head *);
> > > +int  acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, 
> > > int);
> > > +
> > > +void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
> > > +void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
> > > +void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
> > > +void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
> > > +void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
> > > +
> > > +void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
> > > +    const char *);
> > > +
> > > +void iommu_writel(struct iommu_softc *, int, uint32_t);
> > > +uint32_t iommu_readl(struct iommu_softc *, int);
> > > +void iommu_writeq(struct iommu_softc *, int, uint64_t);
> > > +uint64_t iommu_readq(struct iommu_softc *, int);
> > > +void iommu_showfault(struct iommu_softc *, int,
> > > +    struct fault_entry *);
> > > +void iommu_showcfg(struct iommu_softc *, int);
> > > +
> > > +int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> > > +    struct acpidmar_drhd *);
> > > +int iommu_enable_translation(struct iommu_softc *, int);
> > > +void iommu_enable_qi(struct iommu_softc *, int);
> > > +void iommu_flush_cache(struct iommu_softc *, void *, size_t);
> > > +void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
> > > +void iommu_flush_write_buffer(struct iommu_softc *);
> > > +void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
> > > +
> > > +void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
> > > +void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
> > > +void iommu_flush_tlb(struct iommu_softc *, int, int);
> > > +void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
> > > +
> > > +void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
> > > +
> > > +const char *dmar_bdf(int);
> > > +
> > > +const char *
> > > +dmar_bdf(int sid)
> > > +{
> > > + static char     bdf[32];
> > > +
> > > + snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
> > > +     sid_bus(sid), sid_dev(sid), sid_fun(sid));
> > > +
> > > + return (bdf);
> > > +}
> > > +
> > > +/* busdma */
> > > +static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
> > > +    bus_size_t, int, bus_dmamap_t *);
> > > +static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
> > > +static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, 
> > > bus_size_t,
> > > +    struct proc *, int);
> > > +static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct 
> > > mbuf *,
> > > +    int);
> > > +static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio 
> > > *, int);
> > > +static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
> > > +    bus_dma_segment_t *, int, bus_size_t, int);
> > > +static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
> > > +static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
> > > +    bus_size_t, int);
> > > +static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, 
> > > bus_size_t,
> > > +    bus_dma_segment_t *, int, int *, int);
> > > +static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
> > > +static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, 
> > > size_t,
> > > +    caddr_t *, int);
> > > +static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
> > > +static paddr_t   dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, 
> > > int, off_t,
> > > +    int, int);
> > > +
> > > +static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const 
> > > char *);
> > > +const char *dom_bdf(struct domain *dom);
> > > +void domain_map_check(struct domain *dom);
> > > +
> > > +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry 
> > > *npte, vaddr_t va, int shift, uint64_t flags);
> > > +int  ivhd_poll_events(struct iommu_softc *iommu);
> > > +void ivhd_showit(struct iommu_softc *);
> > > +void ivhd_showdte(void);
> > > +void ivhd_showcmd(struct iommu_softc *);
> > > +
> > > +static inline int
> > > +debugme(struct domain *dom)
> > > +{
> > > + return 0;
> > > + return (dom->flag & DOM_DEBUG);
> > > +}
> > > +
> > > +void
> > > +domain_map_check(struct domain *dom)
> > > +{
> > > + struct iommu_softc *iommu;
> > > + struct domain_dev *dd;
> > > + struct context_entry *ctx;
> > > + int v;
> > > +
> > > + iommu = dom->iommu;
> > > + TAILQ_FOREACH(dd, &dom->devices, link) {
> > > +         acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
> > > +
> > > +         if (iommu->dte)
> > > +                 continue;
> > > +
> > > +         /* Check if this is the first time we are mapped */
> > > +         ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
> > > +         v = context_user(ctx);
> > > +         if (v != 0xA) {
> > > +                 printf("  map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> > > +                     iommu->segment,
> > > +                     sid_bus(dd->sid),
> > > +                     sid_dev(dd->sid),
> > > +                     sid_fun(dd->sid),
> > > +                     iommu->id,
> > > +                     dom->did);
> > > +                 context_set_user(ctx, 0xA);
> > > +         }
> > > + }
> > > +}
> > > +
> > > +/* Map a single page as passthrough - used for DRM */
> > > +void
> > > +dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > +
> > > + if (!acpidmar_sc)
> > > +         return;
> > > + domain_map_check(dom);
> > > + domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
> > > +}
> > > +
> > > +/* Map a range of pages 1:1 */
> > > +void
> > > +domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
> > > +{
> > > + domain_map_check(dom);
> > > + while (start < end) {
> > > +         domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
> > > +         start += VTD_PAGE_SIZE;
> > > + }
> > > +}
> > > +
> > > +/* Map a single paddr to IOMMU paddr */
> > > +void
> > > +domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, 
> > > uint64_t flags)
> > > +{
> > > + paddr_t paddr;
> > > + struct pte_entry *pte, *npte;
> > > + int lvl, idx;
> > > + struct iommu_softc *iommu;
> > > +
> > > + iommu = dom->iommu;
> > > + /* Insert physical address into virtual address map
> > > +  * XXX: could we use private pmap here?
> > > +  * essentially doing a pmap_enter(map, va, pa, prot);
> > > +  */
> > > +
> > > + /* Only handle 4k pages for now */
> > > + npte = dom->pte;
> > > + for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
> > > +     lvl -= VTD_STRIDE_SIZE) {
> > > +         idx = (va >> lvl) & VTD_STRIDE_MASK;
> > > +         pte = &npte[idx];
> > > +         if (lvl == VTD_LEVEL0) {
> > > +                 /* Level 1: Page Table - add physical address */
> > > +                 pte->val = pa | flags;
> > > +                 iommu_flush_cache(iommu, pte, sizeof(*pte));
> > > +                 break;
> > > +         } else if (!(pte->val & PTE_P)) {
> > > +                 /* Level N: Point to lower level table */
> > > +                 iommu_alloc_page(iommu, &paddr);
> > > +                 pte->val = paddr | PTE_P | PTE_R | PTE_W;
> > > +                 iommu_flush_cache(iommu, pte, sizeof(*pte));
> > > +         }
> > > +         npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
> > > + }
> > > +}
> > > +
> > > +/* Map a single paddr to IOMMU paddr: AMD
> > > + * physical address breakdown into levels:
> > > + * 
> > > xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
> > > + *        
> > > 5.55555555.44444444.43333333,33222222.22211111.1111----.-------- 
> > > + * mode:
> > > + *  000 = none   shift
> > > + *  001 = 1 [21].12
> > > + *  010 = 2 [30].21
> > > + *  011 = 3 [39].30
> > > + *  100 = 4 [48].39
> > > + *  101 = 5 [57]
> > > + *  110 = 6
> > > + *  111 = reserved
> > > + */
> > > +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry 
> > > *pte, vaddr_t va, 
> > > + int shift, uint64_t flags)
> > > +{
> > > + paddr_t paddr;
> > > + int idx;
> > > + 
> > > + idx = (va >> shift) & VTD_STRIDE_MASK;
> > > + if (!(pte[idx].val & PTE_P)) {
> > > +         /* Page Table entry is not present... create a new page entry */
> > > +         iommu_alloc_page(iommu, &paddr);
> > > +         pte[idx].val = paddr | flags;
> > > +         iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
> > > +         }
> > > + return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
> > > +}
> > > + 
> > > +void
> > > +domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t 
> > > flags)
> > > +{
> > > + struct pte_entry *pte;
> > > + struct iommu_softc *iommu;
> > > + int idx;
> > > +
> > > + iommu = dom->iommu;
> > > + /* Insert physical address into virtual address map
> > > +  * XXX: could we use private pmap here?
> > > +  * essentially doing a pmap_enter(map, va, pa, prot);
> > > +  */
> > > +
> > > + /* Always assume AMD levels=4                           */
> > > + /*        39        30        21        12              */
> > > + /* ---------|---------|---------|---------|------------ */
> > > + pte = dom->pte;
> > > + //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | 
> > > PTE_P);
> > > + pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | 
> > > PTE_P);
> > > + pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | 
> > > PTE_P);
> > > + //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | 
> > > PTE_P);
> > > +
> > > + if (flags)
> > > +         flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
> > > +
> > > + /* Level 1: Page Table - add physical address */
> > > + idx = (va >> 12) & 0x1FF;
> > > + pte[idx].val = pa | flags;
> > > +
> > > + iommu_flush_cache(iommu, pte, sizeof(*pte));
> > > +}
> > > +
> > > +static void
> > > +dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
> > > +    const char *lbl)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int i;
> > > +
> > > + return;
> > > + if (!debugme(dom))
> > > +         return;
> > > + printf("%s: %s\n", lbl, dom_bdf(dom));
> > > + for (i = 0; i < nseg; i++) {
> > > +         printf("  %.16llx %.8x\n",
> > > +             (uint64_t)segs[i].ds_addr,
> > > +             (uint32_t)segs[i].ds_len);
> > > + }
> > > +}
> > > +
> > > +/* Unload mapping */
> > > +void
> > > +domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
> > > +{
> > > + bus_dma_segment_t       *seg;
> > > + paddr_t                 base, end, idx;
> > > + psize_t                 alen;
> > > + int                     i;
> > > +
> > > + if (iommu_bad(dom->iommu)) {
> > > +         printf("unload map no iommu\n");
> > > +         return;
> > > + }
> > > +
> > > + //acpidmar_intr(dom->iommu);
> > > + for (i = 0; i < dmam->dm_nsegs; i++) {
> > > +         seg  = &dmam->dm_segs[i];
> > > +
> > > +         base = trunc_page(seg->ds_addr);
> > > +         end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> > > +         alen = end - base;
> > > +
> > > +         if (debugme(dom)) {
> > > +                 printf("  va:%.16llx len:%x\n",
> > > +                     (uint64_t)base, (uint32_t)alen);
> > > +         }
> > > +
> > > +         /* Clear PTE */
> > > +         for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
> > > +                 domain_map_page(dom, base + idx, 0, 0);
> > > +
> > > +         if (dom->flag & DOM_NOMAP) {
> > > +                 //printf("%s: nomap %.16llx\n", dom_bdf(dom), 
> > > (uint64_t)base);
> > > +                 continue;
> > > +         }               
> > > +
> > > +         mtx_enter(&dom->exlck);
> > > +         if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
> > > +                 panic("domain_unload_map: extent_free");
> > > +         }
> > > +         mtx_leave(&dom->exlck);
> > > + }
> > > +}
> > > +
> > > +/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
> > > +void
> > > +domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int 
> > > pteflag, const char *fn)
> > > +{
> > > + bus_dma_segment_t       *seg;
> > > + struct iommu_softc      *iommu;
> > > + paddr_t                 base, end, idx;
> > > + psize_t                 alen;
> > > + u_long                  res;
> > > + int                     i;
> > > +
> > > + iommu = dom->iommu;
> > > + if (!iommu_enabled(iommu)) {
> > > +         /* Lazy enable translation when required */
> > > +         if (iommu_enable_translation(iommu, 1)) {
> > > +                 return;
> > > +         }
> > > + }
> > > + domain_map_check(dom);
> > > + //acpidmar_intr(iommu);
> > > + for (i = 0; i < map->dm_nsegs; i++) {
> > > +         seg = &map->dm_segs[i];
> > > +
> > > +         base = trunc_page(seg->ds_addr);
> > > +         end  = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
> > > +         alen = end - base;
> > > +         res  = base;
> > > +
> > > +         if (dom->flag & DOM_NOMAP) {
> > > +                 goto nomap;
> > > +         }
> > > +
> > > +         /* Allocate DMA Virtual Address */
> > > +         mtx_enter(&dom->exlck);
> > > +         if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
> > > +             map->_dm_boundary, EX_NOWAIT, &res)) {
> > > +                 panic("domain_load_map: extent_alloc");
> > > +         }
> > > +         if (res == -1) {
> > > +                 panic("got -1 address\n");
> > > +         }
> > > +         mtx_leave(&dom->exlck);
> > > +
> > > +         /* Reassign DMA address */
> > > +         seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
> > > +nomap:
> > > +         if (debugme(dom)) {
> > > +                 printf("  LOADMAP: %.16llx %x => %.16llx\n",
> > > +                     (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
> > > +                     (uint64_t)res);
> > > +         }
> > > +         for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
> > > +                 domain_map_page(dom, res + idx, base + idx,
> > > +                     PTE_P | pteflag);
> > > +         }
> > > + }
> > > + if ((iommu->cap & CAP_CM) || force_cm) {
> > > +         iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
> > > + } else {
> > > +         iommu_flush_write_buffer(iommu);
> > > + }
> > > +}
> > > +
> > > +const char *
> > > +dom_bdf(struct domain *dom)
> > > +{
> > > + struct domain_dev *dd;
> > > + static char             mmm[48];
> > > +
> > > + dd = TAILQ_FIRST(&dom->devices);
> > > + snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
> > > +     dmar_bdf(dd->sid), dom->iommu->id, dom->did,
> > > +     dom->did == DID_UNITY ? " [unity]" : "");
> > > + return (mmm);
> > > +}
> > > +
> > > +/* Bus DMA Map functions */
> > > +static int
> > > +dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
> > > +    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t 
> > > *dmamp)
> > > +{
> > > + int rc;
> > > +
> > > + rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
> > > +     flags, dmamp);
> > > + if (!rc) {
> > > +         dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
> > > +             __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
> > > +{
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > > + _bus_dmamap_destroy(tag, dmam);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
> > > +    bus_size_t buflen, struct proc *p, int flags)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int             rc;
> > > +
> > > + rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
> > > + if (!rc) {
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > +         domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf 
> > > *chain,
> > > +    int flags)
> > > +{
> > > + struct domain   *dom = tag->_cookie;
> > > + int             rc;
> > > +
> > > + rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
> > > + if (!rc) {
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > +         domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio 
> > > *uio,
> > > +    int flags)
> > > +{
> > > + struct domain   *dom = tag->_cookie;
> > > + int             rc;
> > > +
> > > + rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
> > > + if (!rc) {
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > +         domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
> > > +    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > + int rc;
> > > +
> > > + rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
> > > + if (!rc) {
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > +         domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
> > > +         dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
> > > +             __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
> > > +{
> > > + struct domain *dom = tag->_cookie;
> > > +
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > > + domain_unload_map(dom, dmam);
> > > + _bus_dmamap_unload(tag, dmam);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
> > > +    bus_size_t len, int ops)
> > > +{
> > > +#if 0
> > > + struct domain *dom = tag->_cookie;
> > > + //int           flag;
> > > +
> > > + flag = PTE_P;
> > > + //acpidmar_intr(dom->iommu);
> > > + if (ops == BUS_DMASYNC_PREREAD) {
> > > +         /* make readable */
> > > +         flag |= PTE_R;
> > > + }
> > > + else if (ops == BUS_DMASYNC_PREWRITE) {
> > > +         /* make writeable */
> > > +         flag |= PTE_W;
> > > + }
> > > + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
> > > +#endif
> > > + _bus_dmamap_sync(tag, dmam, offset, len, ops);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t 
> > > alignment,
> > > +    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
> > > +    int flags)
> > > +{
> > > + int rc;
> > > +
> > > + rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
> > > +     rsegs, flags);
> > > + if (!rc) {
> > > +         dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
> > > + }
> > > + return (rc);
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
> > > +{
> > > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > > + _bus_dmamem_free(tag, segs, nsegs);
> > > +}
> > > +
> > > +static int
> > > +dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> > > +    size_t size, caddr_t *kvap, int flags)
> > > +{
> > > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > > + return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
> > > +}
> > > +
> > > +static void
> > > +dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
> > > +{
> > > + struct domain   *dom = tag->_cookie;
> > > +
> > > + if (debugme(dom)) {
> > > +         printf("dmamap_unmap: %s\n", dom_bdf(dom));
> > > + }
> > > + _bus_dmamem_unmap(tag, kva, size);
> > > +}
> > > +
> > > +static paddr_t
> > > +dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
> > > +    off_t off, int prot, int flags)
> > > +{
> > > + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
> > > + return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
> > > +}
> > > +
> > > +/*===================================
> > > + * IOMMU code
> > > + *===================================*/
> > > +
> > > +/* Intel: Set Context Root Address */
> > > +void
> > > +iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
> > > +{
> > > + int i, sts;
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > + iommu_writeq(iommu, DMAR_RTADDR_REG, paddr);
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
> > > + for (i = 0; i < 5; i++) {
> > > +         sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > +         if (sts & GSTS_RTPS)
> > > +                 break;
> > > + }
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > + if (i == 5) {
> > > +         printf("set_rtaddr fails\n");
> > > + }
> > > +}
> > > +
> > > +/* COMMON: Allocate a new memory page */
> > > +void *
> > > +iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
> > > +{
> > > + void    *va;
> > > +
> > > + *paddr = 0;
> > > + va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
> > > + if (va == NULL) {
> > > +         panic("can't allocate page\n");
> > > + }
> > > + pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
> > > + return (va);
> > > +}
> > > +
> > > +
> > > +/* Intel: Issue command via queued invalidation */
> > > +void
> > > +iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
> > > +{
> > > +#if 0
> > > + struct qi_entry *pi, *pw;
> > > +
> > > + idx = iommu->qi_head;
> > > + pi = &iommu->qi[idx];
> > > + pw = &iommu->qi[(idx+1) % MAXQ];
> > > + iommu->qi_head = (idx+2) % MAXQ;
> > > +
> > > + memcpy(pw, &qi, sizeof(qi));
> > > + issue command;
> > > + while (pw->xxx)
> > > +         ;
> > > +#endif
> > > +}
> > > +
> > > +/* Intel: Flush TLB entries, Queued Invalidation mode */
> > > +void
> > > +iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
> > > +{
> > > + struct qi_entry qi;
> > > +
> > > + /* Use queued invalidation */
> > > + qi.hi = 0;
> > > + switch (mode) {
> > > + case IOTLB_GLOBAL:
> > > +         qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
> > > +         break;
> > > + case IOTLB_DOMAIN:
> > > +         qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
> > > +             QI_IOTLB_DID(did);
> > > +         break;
> > > + case IOTLB_PAGE:
> > > +         qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
> > > +         qi.hi = 0;
> > > +         break;
> > > + }
> > > + if (iommu->cap & CAP_DRD)
> > > +         qi.lo |= QI_IOTLB_DR;
> > > + if (iommu->cap & CAP_DWD)
> > > +         qi.lo |= QI_IOTLB_DW;
> > > + iommu_issue_qi(iommu, &qi);
> > > +}
> > > +
> > > +/* Intel: Flush Context entries, Queued Invalidation mode */
> > > +void
> > > +iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
> > > +    int sid, int fm)
> > > +{
> > > + struct qi_entry qi;
> > > +
> > > + /* Use queued invalidation */
> > > + qi.hi = 0;
> > > + switch (mode) {
> > > + case CTX_GLOBAL:
> > > +         qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
> > > +         break;
> > > + case CTX_DOMAIN:
> > > +         qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
> > > +         break;
> > > + case CTX_DEVICE:
> > > +         qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
> > > +             QI_CTX_SID(sid) | QI_CTX_FM(fm);
> > > +         break;
> > > + }
> > > + iommu_issue_qi(iommu, &qi);
> > > +}
> > > +
> > > +/* Intel: Flush write buffers */
> > > +void
> > > +iommu_flush_write_buffer(struct iommu_softc *iommu)
> > > +{
> > > + int i, sts;
> > > +
> > > + if (iommu->dte)
> > > +         return;
> > > + if (!(iommu->cap & CAP_RWBF))
> > > +         return;
> > > + printf("writebuf\n");
> > > + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
> > > + for (i = 0; i < 5; i++) {
> > > +         sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > +         if (sts & GSTS_WBFS)
> > > +                 break;
> > > +         delay(10000);
> > > + }
> > > + if (i == 5) {
> > > +         printf("write buffer flush fails\n");
> > > + }
> > > +}
> > > +
> > > +void
> > > +iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
> > > +{
> > > + if (iommu->dte) {
> > > +         pmap_flush_cache((vaddr_t)addr, size);
> > > +         return;
> > > + }
> > > + if (!(iommu->ecap & ECAP_C))
> > > +         pmap_flush_cache((vaddr_t)addr, size);
> > > +}
> > > +
> > > +/*
> > > + * Intel: Flush IOMMU TLB Entries
> > > + * Flushing can occur globally, per domain or per page 
> > > + */
> > > +void
> > > +iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
> > > +{
> > > + int             n;
> > > + uint64_t        val;
> > > +
> > > + /* Call AMD */
> > > + if (iommu->dte) {
> > > +         ivhd_invalidate_domain(iommu, did);
> > > +         //ivhd_poll_events(iommu);
> > > +         return;
> > > + }
> > > + val = IOTLB_IVT;
> > > + switch (mode) {
> > > + case IOTLB_GLOBAL:
> > > +         val |= IIG_GLOBAL;
> > > +         break;
> > > + case IOTLB_DOMAIN:
> > > +         val |= IIG_DOMAIN | IOTLB_DID(did);
> > > +         break;
> > > + case IOTLB_PAGE:
> > > +         val |= IIG_PAGE | IOTLB_DID(did);
> > > +         break;
> > > + }
> > > +
> > > + /* Check for Read/Write Drain */
> > > + if (iommu->cap & CAP_DRD)
> > > +         val |= IOTLB_DR;
> > > + if (iommu->cap & CAP_DWD)
> > > +         val |= IOTLB_DW;
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > +
> > > + iommu_writeq(iommu, DMAR_IOTLB_REG(iommu), val);
> > > + n = 0;
> > > + do {
> > > +         val = iommu_readq(iommu, DMAR_IOTLB_REG(iommu));
> > > + } while (n++ < 5 && val & IOTLB_IVT);
> > > +
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > +#ifdef DEBUG
> > > + {
> > > +         static int rg;
> > > +         int a, r;
> > > +
> > > +         if (!rg) {
> > > +                 a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
> > > +                 r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
> > > +                 if (a != r) {
> > > +                         printf("TLB Requested:%d Actual:%d\n", r, a);
> > > +                         rg = 1;
> > > +                 }
> > > +         }
> > > + }
> > > +#endif
> > > +}
> > > +
> > > +/* Intel: Flush IOMMU settings
> > > + * Flushes can occur globally, per domain, or per device
> > > + */
> > > +void
> > > +iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, 
> > > int fm)
> > > +{
> > > + uint64_t        val;
> > > + int             n;
> > > +
> > > + if (iommu->dte)
> > > +         return;
> > > + val = CCMD_ICC;
> > > + switch (mode) {
> > > + case CTX_GLOBAL:
> > > +         val |= CIG_GLOBAL;
> > > +         break;
> > > + case CTX_DOMAIN:
> > > +         val |= CIG_DOMAIN | CCMD_DID(did);
> > > +         break;
> > > + case CTX_DEVICE:
> > > +         val |= CIG_DEVICE | CCMD_DID(did) |
> > > +             CCMD_SID(sid) | CCMD_FM(fm);
> > > +         break;
> > > + }
> > > +
> > > + mtx_enter(&iommu->reg_lock);
> > > +
> > > + n = 0;
> > > + iommu_writeq(iommu, DMAR_CCMD_REG, val);
> > > + do {
> > > +         val = iommu_readq(iommu, DMAR_CCMD_REG);
> > > + } while (n++ < 5 && val & CCMD_ICC);
> > > +
> > > + mtx_leave(&iommu->reg_lock);
> > > +
> > > +#ifdef DEBUG
> > > + {
> > > +         static int rg;
> > > +         int a, r;
> > > +
> > > +         if (!rg) {
> > > +                 a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
> > > +                 r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
> > > +                 if (a != r) {
> > > +                         printf("CTX Requested:%d Actual:%d\n", r, a);
> > > +                         rg = 1;
> > > +                 }
> > > +         }
> > > + }
> > > +#endif
> > > +}
> > > +
> > > +/* Intel: Enable Queued Invalidation */
> > > +void
> > > +iommu_enable_qi(struct iommu_softc *iommu, int enable)
> > > +{
> > > + int     n = 0;
> > > + int     sts;
> > > +
> > > + if (!(iommu->ecap & ECAP_QI))
> > > +         return;
> > > +
> > > + if (enable) {
> > > +         iommu->gcmd |= GCMD_QIE;
> > > +
> > > +         mtx_enter(&iommu->reg_lock);
> > > +
> > > +         iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > +         do {
> > > +                 sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > +         } while (n++ < 5 && !(sts & GSTS_QIES));
> > > +
> > > +         mtx_leave(&iommu->reg_lock);
> > > +
> > > +         printf("set.qie: %d\n", n);
> > > + } else {
> > > +         iommu->gcmd &= ~GCMD_QIE;
> > > +
> > > +         mtx_enter(&iommu->reg_lock);
> > > +
> > > +         iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > +         do {
> > > +                 sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > +         } while (n++ < 5 && sts & GSTS_QIES);
> > > +
> > > +         mtx_leave(&iommu->reg_lock);
> > > +
> > > +         printf("clr.qie: %d\n", n);
> > > + }
> > > +}
> > > +
> > > +/* Intel: Enable IOMMU translation */
> > > +int
> > > +iommu_enable_translation(struct iommu_softc *iommu, int enable)
> > > +{
> > > + uint32_t        sts;
> > > + uint64_t        reg;
> > > + int             n = 0;
> > > +
> > > + if (iommu->dte)
> > > +         return (0);
> > > + reg = 0;
> > > + if (enable) {
> > > +         printf("enable iommu %d\n", iommu->id);
> > > +         iommu_showcfg(iommu, -1);
> > > +
> > > +         iommu->gcmd |= GCMD_TE;
> > > +
> > > +         /* Enable translation */
> > > +         printf(" pre tes: ");
> > > +
> > > +         mtx_enter(&iommu->reg_lock);
> > > +         iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > +         printf("xxx");
> > > +         do {
> > > +                 printf("yyy");
> > > +                 sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > +                 delay(n * 10000);
> > > +         } while (n++ < 5 && !(sts & GSTS_TES));
> > > +         mtx_leave(&iommu->reg_lock);
> > > +
> > > +         printf(" set.tes: %d\n", n);
> > > +
> > > +         if (n >= 5) {
> > > +                 printf("error.. unable to initialize iommu %d\n",
> > > +                     iommu->id);
> > > +                 iommu->flags |= IOMMU_FLAGS_BAD;
> > > +
> > > +                 /* Disable IOMMU */
> > > +                 iommu->gcmd &= ~GCMD_TE;
> > > +                 mtx_enter(&iommu->reg_lock);
> > > +                 iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > +                 mtx_leave(&iommu->reg_lock);
> > > +
> > > +                 return (1);
> > > +         }
> > > +
> > > +         iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
> > > +         iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> > > + } else {
> > > +         iommu->gcmd &= ~GCMD_TE;
> > > +
> > > +         mtx_enter(&iommu->reg_lock);
> > > +
> > > +         iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
> > > +         do {
> > > +                 sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > +         } while (n++ < 5 && sts & GSTS_TES);
> > > +         mtx_leave(&iommu->reg_lock);
> > > +
> > > +         printf(" clr.tes: %d\n", n);
> > > + }
> > > +
> > > + return (0);
> > > +}
> > > +
> > > +/* Intel: Initialize IOMMU */
> > > +int
> > > +iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
> > > +    struct acpidmar_drhd *dh)
> > > +{
> > > + static int      niommu;
> > > + int             len = VTD_PAGE_SIZE;
> > > + int             i, gaw;
> > > + uint32_t        sts;
> > > + paddr_t         paddr;
> > > +
> > > + if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) 
> > > {
> > > +         return (-1);
> > > + }
> > > +
> > > + TAILQ_INIT(&iommu->domains);
> > > + iommu->id = ++niommu;
> > > + iommu->flags = dh->flags;
> > > + iommu->segment = dh->segment;
> > > + iommu->iot = sc->sc_memt;
> > > +
> > > + iommu->cap = iommu_readq(iommu, DMAR_CAP_REG);
> > > + iommu->ecap = iommu_readq(iommu, DMAR_ECAP_REG);
> > > + iommu->ndoms = cap_nd(iommu->cap);
> > > +
> > > + printf("  caps: %s%s%s%s%s%s%s%s%s%s%s\n",
> > > +     iommu->cap & CAP_AFL ? "afl " : "",         // adv fault
> > > +     iommu->cap & CAP_RWBF ? "rwbf " : "",       // write-buffer flush
> > > +     iommu->cap & CAP_PLMR ? "plmr " : "",       // protected lo region
> > > +     iommu->cap & CAP_PHMR ? "phmr " : "",       // protected hi region
> > > +     iommu->cap & CAP_CM ? "cm " : "",           // caching mode
> > > +     iommu->cap & CAP_ZLR ? "zlr " : "",         // zero-length read
> > > +     iommu->cap & CAP_PSI ? "psi " : "",         // page invalidate
> > > +     iommu->cap & CAP_DWD ? "dwd " : "",         // write drain
> > > +     iommu->cap & CAP_DRD ? "drd " : "",         // read drain
> > > +     iommu->cap & CAP_FL1GP ? "Gb " : "",        // 1Gb pages
> > > +     iommu->cap & CAP_PI ? "pi " : "");          // posted interrupts
> > > + printf("  ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
> > > +     iommu->ecap & ECAP_C ? "c " : "",           // coherent
> > > +     iommu->ecap & ECAP_QI ? "qi " : "",         // queued invalidate
> > > +     iommu->ecap & ECAP_DT ? "dt " : "",         // device iotlb
> > > +     iommu->ecap & ECAP_IR ? "ir " : "",         // intr remap
> > > +     iommu->ecap & ECAP_EIM ? "eim " : "",       // x2apic
> > > +     iommu->ecap & ECAP_PT ? "pt " : "",         // passthrough
> > > +     iommu->ecap & ECAP_SC ? "sc " : "",         // snoop control
> > > +     iommu->ecap & ECAP_ECS ? "ecs " : "",       // extended context
> > > +     iommu->ecap & ECAP_MTS ? "mts " : "",       // memory type
> > > +     iommu->ecap & ECAP_NEST ? "nest " : "",     // nested translations
> > > +     iommu->ecap & ECAP_DIS ? "dis " : "",       // deferred invalidation
> > > +     iommu->ecap & ECAP_PASID ? "pas " : "",     // pasid
> > > +     iommu->ecap & ECAP_PRS ? "prs " : "",       // page request
> > > +     iommu->ecap & ECAP_ERS ? "ers " : "",       // execute request
> > > +     iommu->ecap & ECAP_SRS ? "srs " : "",       // supervisor request
> > > +     iommu->ecap & ECAP_NWFS ? "nwfs " : "",     // no write flag
> > > +     iommu->ecap & ECAP_EAFS ? "eafs " : "");    // extended accessed 
> > > flag
> > > +
> > > + mtx_init(&iommu->reg_lock, IPL_HIGH);
> > > +
> > > + /* Clear Interrupt Masking */
> > > + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
> > > +
> > > + iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
> > > +     acpidmar_intr, iommu, "dmarintr");
> > > +
> > > + /* Enable interrupts */
> > > + sts = iommu_readl(iommu, DMAR_FECTL_REG);
> > > + iommu_writel(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
> > > +
> > > + /* Allocate root pointer */
> > > + iommu->root = iommu_alloc_page(iommu, &paddr);
> > > +#ifdef DEBUG
> > > + printf("Allocated root pointer: pa:%.16llx va:%p\n",
> > > +     (uint64_t)paddr, iommu->root);
> > > +#endif
> > > + iommu->rtaddr = paddr;
> > > + iommu_flush_write_buffer(iommu);
> > > + iommu_set_rtaddr(iommu, paddr);
> > > +
> > > +#if 0
> > > + if (iommu->ecap & ECAP_QI) {
> > > +         /* Queued Invalidation support */
> > > +         iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
> > > +         iommu_writeq(iommu, DMAR_IQT_REG, 0);
> > > +         iommu_writeq(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
> > > + }
> > > + if (iommu->ecap & ECAP_IR) {
> > > +         /* Interrupt remapping support */
> > > +         iommu_writeq(iommu, DMAR_IRTA_REG, 0);
> > > + }
> > > +#endif
> > > +
> > > + /* Calculate guest address width and supported guest widths */
> > > + gaw = -1;
> > > + iommu->mgaw = cap_mgaw(iommu->cap);
> > > + printf("gaw: %d { ", iommu->mgaw);
> > > + for (i = 0; i < 5; i++) {
> > > +         if (cap_sagaw(iommu->cap) & (1L << i)) {
> > > +                 gaw = VTD_LEVELTOAW(i);
> > > +                 printf("%d ", gaw);
> > > +                 iommu->agaw = gaw;
> > > +         }
> > > + }
> > > + printf("}\n");
> > > +
> > > + /* Cache current status register bits */
> > > + sts = iommu_readl(iommu, DMAR_GSTS_REG);
> > > + if (sts & GSTS_TES)
> > > +         iommu->gcmd |= GCMD_TE;
> > > + if (sts & GSTS_QIES)
> > > +         iommu->gcmd |= GCMD_QIE;
> > > + if (sts & GSTS_IRES)
> > > +         iommu->gcmd |= GCMD_IRE;
> > > + if (iommu->gcmd) {
> > > +         printf("gcmd: %x preset\n", iommu->gcmd);
> > > + }
> > > + acpidmar_intr(iommu);
> > > + return (0);
> > > +}
> > > +
> > > +const char *dmar_rn(int reg);
> > > +
> > > +const char *
> > > +dmar_rn(int reg)
> > > +{
> > > + switch (reg) {
> > > + case EVT_HEAD_REG: return "evthead";
> > > + case EVT_TAIL_REG: return "evttail";
> > > + case CMD_HEAD_REG: return "cmdhead";
> > > + case CMD_TAIL_REG: return "cmdtail";
> > > + case CMD_BASE_REG: return "cmdbase";
> > > + case EVT_BASE_REG: return "evtbase";
> > > + case DEV_TAB_BASE_REG: return "devtblbase";
> > > + case IOMMUCTL_REG: return "iommuctl";
> > > +#if 0
> > > + case DMAR_VER_REG: return "ver";
> > > + case DMAR_CAP_REG: return "cap";
> > > + case DMAR_ECAP_REG: return "ecap";
> > > + case DMAR_GSTS_REG: return "gsts";
> > > + case DMAR_GCMD_REG: return "gcmd";
> > > + case DMAR_FSTS_REG: return "fsts";
> > > + case DMAR_FECTL_REG: return "fectl";
> > > + case DMAR_RTADDR_REG: return "rtaddr";
> > > + case DMAR_FEDATA_REG: return "fedata";
> > > + case DMAR_FEADDR_REG: return "feaddr";
> > > + case DMAR_FEUADDR_REG: return "feuaddr";
> > > + case DMAR_PMEN_REG: return "pmen";
> > > + case DMAR_IEDATA_REG: return "iedata";
> > > + case DMAR_IEADDR_REG: return "ieaddr";
> > > + case DMAR_IEUADDR_REG: return "ieuaddr";
> > > + case DMAR_IRTA_REG: return "irta";
> > > + case DMAR_CCMD_REG: return "ccmd";
> > > + case DMAR_IQH_REG: return "iqh";
> > > + case DMAR_IQT_REG: return "iqt";
> > > + case DMAR_IQA_REG: return "iqa";
> > > +#endif
> > > + }
> > > + return "unknown";
> > > +}
> > > +
> > > +/* Read/Write IOMMU register */
> > > +uint32_t
> > > +iommu_readl(struct iommu_softc *iommu, int reg)
> > > +{
> > > + uint32_t        v;
> > > +
> > > + v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
> > > + if (reg < 00) {
> > > +         printf("iommu%d: read %x %.8lx [%s]\n",
> > > +             iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + }
> > > +
> > > + return (v);
> > > +}
> > > +
> > > +
> > > +#define dbprintf(x...)
> > > +
> > > +void
> > > +iommu_writel(struct iommu_softc *iommu, int reg, uint32_t v)
> > > +{
> > > + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> > > +     iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
> > > +}
> > > +
> > > +uint64_t
> > > +iommu_readq(struct iommu_softc *iommu, int reg)
> > > +{
> > > + uint64_t        v;
> > > +
> > > + v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
> > > + if (reg < 00) {
> > > +         printf("iommu%d: read %x %.8lx [%s]\n",
> > > +             iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + }
> > > +
> > > + return (v);
> > > +}
> > > +
> > > +void
> > > +iommu_writeq(struct iommu_softc *iommu, int reg, uint64_t v)
> > > +{
> > > + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
> > > +             iommu->id, reg, (unsigned long)v, dmar_rn(reg));
> > > + bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
> > > +}
> > > +
> > > +/* Check if a device is within a device scope */
> > > +int
> > > +acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t 
> > > pc,
> > > +    int sid)
> > > +{
> > > + struct dmar_devlist     *ds;
> > > + int                     sub, sec, i;
> > > + int                     bus, dev, fun, sbus;
> > > + pcireg_t                reg;
> > > + pcitag_t                tag;
> > > +
> > > + sbus = sid_bus(sid);
> > > + TAILQ_FOREACH(ds, devlist, link) {
> > > +         bus = ds->bus;
> > > +         dev = ds->dp[0].device;
> > > +         fun = ds->dp[0].function;
> > > +         /* Walk PCI bridges in path */
> > > +         for (i = 1; i < ds->ndp; i++) {
> > > +                 tag = pci_make_tag(pc, bus, dev, fun);
> > > +                 reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> > > +                 bus = PPB_BUSINFO_SECONDARY(reg);
> > > +                 dev = ds->dp[i].device;
> > > +                 fun = ds->dp[i].function;
> > > +         }
> > > +
> > > +         /* Check for device exact match */
> > > +         if (sid == mksid(bus, dev, fun)) {
> > > +                 return DMAR_ENDPOINT;
> > > +         }
> > > +
> > > +         /* Check for device subtree match */
> > > +         if (ds->type == DMAR_BRIDGE) {
> > > +                 tag = pci_make_tag(pc, bus, dev, fun);
> > > +                 reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
> > > +                 sec = PPB_BUSINFO_SECONDARY(reg);
> > > +                 sub = PPB_BUSINFO_SUBORDINATE(reg);
> > > +                 if (sec <= sbus && sbus <= sub) {
> > > +                         return DMAR_BRIDGE;
> > > +                 }
> > > +         }
> > > + }
> > > +
> > > + return (0);
> > > +}
> > > +
> > > +struct domain *
> > > +domain_create(struct iommu_softc *iommu, int did)
> > > +{
> > > + struct domain   *dom;
> > > + int gaw;
> > > +
> > > + printf("iommu%d: create domain: %.4x\n", iommu->id, did);
> > > + dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + dom->did = did;
> > > + dom->iommu = iommu;
> > > + dom->pte = iommu_alloc_page(iommu, &dom->ptep);
> > > + TAILQ_INIT(&dom->devices);
> > > +
> > > + /* Setup DMA */
> > > + dom->dmat._cookie = dom;
> > > + dom->dmat._dmamap_create    = dmar_dmamap_create;       // nop
> > > + dom->dmat._dmamap_destroy   = dmar_dmamap_destroy;      // nop
> > > + dom->dmat._dmamap_load      = dmar_dmamap_load;         // lm
> > > + dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf;    // lm
> > > + dom->dmat._dmamap_load_uio  = dmar_dmamap_load_uio;     // lm
> > > + dom->dmat._dmamap_load_raw  = dmar_dmamap_load_raw;     // lm
> > > + dom->dmat._dmamap_unload    = dmar_dmamap_unload;       // um
> > > + dom->dmat._dmamap_sync      = dmar_dmamap_sync;         // lm
> > > + dom->dmat._dmamem_alloc     = dmar_dmamem_alloc;        // nop
> > > + dom->dmat._dmamem_free      = dmar_dmamem_free;         // nop
> > > + dom->dmat._dmamem_map       = dmar_dmamem_map;          // nop
> > > + dom->dmat._dmamem_unmap     = dmar_dmamem_unmap;        // nop
> > > + dom->dmat._dmamem_mmap      = dmar_dmamem_mmap;
> > > +
> > > + snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
> > > +     iommu->id, dom->did);
> > > +
> > > + /* Setup IOMMU address map */
> > > + gaw = min(iommu->agaw, iommu->mgaw);
> > > + dom->iovamap = extent_create(dom->exname, 1024*1024*16,
> > > +     (1LL << gaw)-1,
> > > +     M_DEVBUF, NULL, 0,
> > > +     EX_WAITOK|EX_NOCOALESCE);
> > > +
> > > + /* Zero out Interrupt region */
> > > + extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
> > > +     EX_WAITOK);
> > > + mtx_init(&dom->exlck, IPL_HIGH);
> > > +
> > > + TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
> > > +
> > > + return dom;
> > > +}
> > > +
> > > +void domain_add_device(struct domain *dom, int sid)
> > > +{        
> > > + struct domain_dev *ddev;
> > > + 
> > > + printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, 
> > > dom->did);
> > > + ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + ddev->sid = sid;
> > > + TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
> > > +
> > > + /* Should set context entry here?? */
> > > +}
> > > +
> > > +void domain_remove_device(struct domain *dom, int sid)
> > > +{
> > > + struct domain_dev *ddev, *tmp;
> > > +
> > > + TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
> > > +         if (ddev->sid == sid) {
> > > +                 TAILQ_REMOVE(&dom->devices, ddev, link);
> > > +                 free(ddev, sizeof(*ddev), M_DEVBUF);
> > > +         }
> > > + }
> > > +}
> > > +
> > > +/* Lookup domain by segment & source id (bus.device.function) */
> > > +struct domain *
> > > +domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
> > > +{
> > > + struct iommu_softc      *iommu;
> > > + struct domain_dev       *ddev;
> > > + struct domain           *dom;
> > > + int                     rc;
> > > +
> > > + if (sc == NULL) {
> > > +         return NULL;
> > > + }
> > > +
> > > + /* Lookup IOMMU for this device */
> > > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > > +         if (iommu->segment != segment)
> > > +                 continue;
> > > +         /* Check for devscope match or catchall iommu */
> > > +         rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
> > > +         if (rc != 0 || iommu->flags) {
> > > +                 break;
> > > +         }
> > > + }
> > > + if (!iommu) {
> > > +         printf("%s: no iommu found\n", dmar_bdf(sid));
> > > +         return NULL;
> > > + }
> > > +
> > > + //acpidmar_intr(iommu);
> > > +
> > > + /* Search domain devices */
> > > + TAILQ_FOREACH(dom, &iommu->domains, link) {
> > > +         TAILQ_FOREACH(ddev, &dom->devices, link) {
> > > +                 /* XXX: match all functions? */
> > > +                 if (ddev->sid == sid) {
> > > +                         return dom;
> > > +                 }
> > > +         }
> > > + }
> > > + if (iommu->ndoms <= 2) {
> > > +         /* Running out of domains.. create catchall domain */
> > > +         if (!iommu->unity) {
> > > +                 iommu->unity = domain_create(iommu, 1);
> > > +         }
> > > +         dom = iommu->unity;
> > > + } else {
> > > +         dom = domain_create(iommu, --iommu->ndoms);
> > > + }
> > > + if (!dom) {
> > > +         printf("no domain here\n");
> > > +         return NULL;
> > > + }
> > > +
> > > + /* Add device to domain */
> > > + domain_add_device(dom, sid);
> > > +
> > > + return dom;
> > > +}
> > > +
> > > +/* Map Guest Pages into IOMMU */
> > > +void  _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
> > > +{
> > > + bus_size_t i;
> > > + paddr_t hpa;
> > > +
> > > + if (dom == NULL) {
> > > +         return;
> > > + }
> > > + printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
> > > + for (i = 0; i < len; i += PAGE_SIZE) {
> > > +         hpa = 0;
> > > +         pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
> > > +         domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
> > > +         gpa += PAGE_SIZE;
> > > +         va  += PAGE_SIZE;
> > > + }
> > > +}
> > > +
> > > +/* Find IOMMU for a given PCI device */
> > > +void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
> > > +{
> > > + struct domain *dom;
> > > +
> > > + dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
> > > + if (dom) {
> > > +         *id = dom->did;
> > > + }
> > > + return dom;
> > > +}
> > > +
> > > +void domain_map_device(struct domain *dom, int sid);
> > > +
> > > +void
> > > +domain_map_device(struct domain *dom, int sid)
> > > +{
> > > + struct iommu_softc      *iommu;
> > > + struct context_entry    *ctx;
> > > + paddr_t                 paddr;
> > > + int                     bus, devfn;
> > > + int                     tt, lvl;
> > > +
> > > + iommu = dom->iommu;
> > > +
> > > + bus = sid_bus(sid);
> > > + devfn = sid_devfn(sid);
> > > + /* AMD attach device */
> > > + if (iommu->dte) {
> > > +         struct ivhd_dte *dte = &iommu->dte[sid];
> > > +         if (!dte->dw0) {
> > > +                 /* Setup Device Table Entry: bus.devfn */
> > > +                 printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, 
> > > dmar_bdf(sid), dom->did);
> > > +                 dte_set_host_page_table_root_ptr(dte, dom->ptep);
> > > +                 dte_set_domain(dte, dom->did);
> > > +                 dte_set_mode(dte, 3);  // Set 4 level PTE
> > > +                 dte_set_tv(dte);
> > > +                 dte_set_valid(dte);
> > > +                 ivhd_flush_devtab(iommu, dom->did);
> > > +                 //ivhd_showit(iommu);
> > > +                 ivhd_showdte();
> > > +         }
> > > +         //ivhd_poll_events(iommu);
> > > +         return;
> > > + }
> > > +
> > > + /* Create Bus mapping */
> > > + if (!root_entry_is_valid(&iommu->root[bus])) {
> > > +         iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
> > > +         iommu->root[bus].lo = paddr | ROOT_P;
> > > +         iommu_flush_cache(iommu, &iommu->root[bus],
> > > +             sizeof(struct root_entry));
> > > +         dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx 
> > > va:%p\n",
> > > +             iommu->id, bus, (uint64_t)paddr,
> > > +             iommu->ctx[bus]);
> > > + }
> > > +
> > > + /* Create DevFn mapping */
> > > + ctx = iommu->ctx[bus] + devfn;
> > > + if (!context_entry_is_valid(ctx)) {
> > > +         tt = CTX_T_MULTI;
> > > +         lvl = VTD_AWTOLEVEL(iommu->agaw);
> > > +
> > > +         /* Initialize context */
> > > +         context_set_slpte(ctx, dom->ptep);
> > > +         context_set_translation_type(ctx, tt);
> > > +         context_set_domain_id(ctx, dom->did);
> > > +         context_set_address_width(ctx, lvl);
> > > +         context_set_present(ctx);
> > > +
> > > +         /* Flush it */
> > > +         iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
> > > +         if ((iommu->cap & CAP_CM) || force_cm) {
> > > +                 iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
> > > +                 iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
> > > +         } else {
> > > +                 iommu_flush_write_buffer(iommu);
> > > +         }
> > > +         dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x 
> > > tt:%d\n",
> > > +             iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
> > > +             dom->did, tt);
> > > + }
> > > +}
> > > +
> > > +struct domain *
> > > +acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int 
> > > mapctx)
> > > +{
> > > + static struct domain    *dom;
> > > +
> > > + dom = domain_lookup(sc, segment, sid);
> > > + if (!dom) {
> > > +         printf("no domain: %s\n", dmar_bdf(sid));
> > > +         return NULL;
> > > + }
> > > +
> > > + if (mapctx) {
> > > +         domain_map_device(dom, sid);
> > > + }
> > > +
> > > + return dom;
> > > +}
> > > +
> > > +int ismap(int bus, int dev, int fun) {
> > > + return 1;
> > > +}
> > > +
> > > +void
> > > +acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> > > +{
> > > + int             bus, dev, fun, sid;
> > > + struct domain   *dom;
> > > + pcireg_t        reg;
> > > +
> > > + if (!acpidmar_sc) {
> > > +         /* No DMAR, ignore */
> > > +         return;
> > > + }
> > > +
> > > + /* Add device to our list */
> > > + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
> > > + sid = mksid(bus, dev, fun);
> > > + if (sid_flag[sid] & SID_INVALID)
> > > +         return;
> > > +
> > > + reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
> > > +#if 0
> > > + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> > > +     PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> > > +         printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
> > > +             pa->pa_domain, bus, dev, fun);
> > > +         return;
> > > + }
> > > +#endif
> > > + /* Add device to domain */
> > > + dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain, sid, 0);
> > > + if (dom == NULL)
> > > +         return;
> > > +
> > > + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
> > > +     PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
> > > +         dom->flag = DOM_NOMAP;
> > > + }
> > > + if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
> > > +     PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
> > > +         /* For ISA Bridges, map 0-16Mb as 1:1 */
> > > +         printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
> > > +             pa->pa_domain, bus, dev, fun);
> > > +         domain_map_pthru(dom, 0x00, 16*1024*1024);
> > > + }
> > > +
> > > + /* Change DMA tag */
> > > + pa->pa_dmat = &dom->dmat;
> > > +}
> > > +
> > > +/* Create list of device scope entries from ACPI table */
> > > +void
> > > +acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
> > > +    struct devlist_head *devlist)
> > > +{
> > > + struct acpidmar_devscope        *ds;
> > > + struct dmar_devlist             *d;
> > > + int                             dplen, i;
> > > +
> > > + TAILQ_INIT(devlist);
> > > + while (off < de->length) {
> > > +         ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
> > > +         off += ds->length;
> > > +
> > > +         /* We only care about bridges and endpoints */
> > > +         if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
> > > +                 continue;
> > > +
> > > +         dplen = ds->length - sizeof(*ds);
> > > +         d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
> > > +         d->bus  = ds->bus;
> > > +         d->type = ds->type;
> > > +         d->ndp  = dplen / 2;
> > > +         d->dp   = (void *)&d[1];
> > > +         memcpy(d->dp, &ds[1], dplen);
> > > +         TAILQ_INSERT_TAIL(devlist, d, link);
> > > +
> > > +         printf("  %8s  %.4x:%.2x.%.2x.%x {",
> > > +             ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
> > > +             segment, ds->bus,
> > > +             d->dp[0].device,
> > > +             d->dp[0].function);
> > > +
> > > +         for (i = 1; i < d->ndp; i++) {
> > > +                 printf(" %2x.%x ",
> > > +                     d->dp[i].device,
> > > +                     d->dp[i].function);
> > > +         }
> > > +         printf("}\n");
> > > + }
> > > +}
> > > +
> > > +/* DMA Remapping Hardware Unit */
> > > +void
> > > +acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > > +{
> > > + struct iommu_softc      *iommu;
> > > +
> > > + printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
> > > +     de->drhd.segment,
> > > +     de->drhd.address,
> > > +     de->drhd.flags);
> > > + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
> > > +     &iommu->devices);
> > > + iommu_init(sc, iommu, &de->drhd);
> > > +
> > > + if (de->drhd.flags) {
> > > +         /* Catchall IOMMU goes at end of list */
> > > +         TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
> > > + } else {
> > > +         TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
> > > + }
> > > +}
> > > +
> > > +/* Reserved Memory Region Reporting */
> > > +void
> > > +acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > > +{
> > > + struct rmrr_softc       *rmrr;
> > > + bios_memmap_t           *im, *jm;
> > > + uint64_t                start, end;
> > > +
> > > + printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
> > > +     de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
> > > + if (de->rmrr.limit <= de->rmrr.base) {
> > > +         printf("  buggy BIOS\n");
> > > +         return;
> > > + }
> > > +
> > > + rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + rmrr->start = trunc_page(de->rmrr.base);
> > > + rmrr->end = round_page(de->rmrr.limit);
> > > + rmrr->segment = de->rmrr.segment;
> > > + acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
> > > +     &rmrr->devices);
> > > +
> > > + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
> > > +         if (im->type != BIOS_MAP_RES)
> > > +                 continue;
> > > +         /* Search for adjacent reserved regions */
> > > +         start = im->addr;
> > > +         end   = im->addr+im->size;
> > > +         for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
> > > +             jm++) {
> > > +                 end = jm->addr+jm->size;
> > > +         }
> > > +         printf("e820: %.16llx - %.16llx\n", start, end);
> > > +         if (start <= rmrr->start && rmrr->end <= end) {
> > > +                 /* Bah.. some buggy BIOS stomp outside RMRR */
> > > +                 printf("  ** inside E820 Reserved %.16llx %.16llx\n",
> > > +                     start, end);
> > > +                 rmrr->start = trunc_page(start);
> > > +                 rmrr->end   = round_page(end);
> > > +                 break;
> > > +         }
> > > + }
> > > + TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
> > > +}
> > > +
> > > +/* Root Port ATS Reporting */
> > > +void
> > > +acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
> > > +{
> > > + struct atsr_softc *atsr;
> > > +
> > > + printf("ATSR: segment:%.4x flags:%x\n",
> > > +     de->atsr.segment,
> > > +     de->atsr.flags);
> > > +
> > > + atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
> > > + atsr->flags = de->atsr.flags;
> > > + atsr->segment = de->atsr.segment;
> > > + acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
> > > +     &atsr->devices);
> > > +
> > > + TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
> > > +}
> > > +
> > > +void
> > > +acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
> > > +{
> > > + struct rmrr_softc       *rmrr;
> > > + struct iommu_softc      *iommu;
> > > + struct domain           *dom;
> > > + struct dmar_devlist     *dl;
> > > + union acpidmar_entry    *de;
> > > + int                     off, sid, rc;
> > > +
> > > + domain_map_page = domain_map_page_intel;
> > > + printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
> > > +     dmar->haw+1,
> > > +     !!(dmar->flags & 0x1),
> > > +     !!(dmar->flags & 0x2));
> > > + sc->sc_haw = dmar->haw+1;
> > > + sc->sc_flags = dmar->flags;
> > > +
> > > + TAILQ_INIT(&sc->sc_drhds);
> > > + TAILQ_INIT(&sc->sc_rmrrs);
> > > + TAILQ_INIT(&sc->sc_atsrs);
> > > +
> > > + off = sizeof(*dmar);
> > > + while (off < dmar->hdr.length) {
> > > +         de = (union acpidmar_entry *)((unsigned char *)dmar + off);
> > > +         switch (de->type) {
> > > +         case DMAR_DRHD:
> > > +                 acpidmar_drhd(sc, de);
> > > +                 break;
> > > +         case DMAR_RMRR:
> > > +                 acpidmar_rmrr(sc, de);
> > > +                 break;
> > > +         case DMAR_ATSR:
> > > +                 acpidmar_atsr(sc, de);
> > > +                 break;
> > > +         default:
> > > +                 printf("DMAR: unknown %x\n", de->type);
> > > +                 break;
> > > +         }
> > > +         off += de->length;
> > > + }
> > > +
> > > + /* Pre-create domains for iommu devices */
> > > + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
> > > +         TAILQ_FOREACH(dl, &iommu->devices, link) {
> > > +                 sid = mksid(dl->bus, dl->dp[0].device,
> > > +                     dl->dp[0].function);
> > > +                 dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
> > > +                 if (dom != NULL) {
> > > +                         printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
> > > +                             iommu->segment, dl->bus, dl->dp[0].device, 
> > > dl->dp[0].function,
> > > +                             iommu->id, dom->did);
> > > +                 }
> > > +         }
> > > + }
> > > + /* Map passthrough pages for RMRR */
> > > + TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
> > > +         TAILQ_FOREACH(dl, &rmrr->devices, link) {
> > > +                 sid = mksid(dl->bus, dl->dp[0].device,
> > > +                     dl->dp[0].function);
> > > +                 dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
> > > +                 if (dom != NULL) {
> > > +                         printf("%s map ident: %.16llx %.16llx\n",
> > > +                             dom_bdf(dom), rmrr->start, rmrr->end);
> > > +                         domain_map_pthru(dom, rmrr->start, rmrr->end);
> > > +                         rc = extent_alloc_region(dom->iovamap,
> > > +                             rmrr->start, rmrr->end, EX_WAITOK);
> > > +                 }
> > > +         }
> > > + }
> > > +}
> > > +
> > > +
> > > +/*=====================================================
> > > + * AMD Vi
> > > + *=====================================================*/
> > > +void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
> > > +int acpiivrs_iommu_match(struct pci_attach_args *);
> > > +int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
> > > + struct acpi_ivhd *);
> > > +void iommu_ivhd_add(struct iommu_softc *, int, int, int);
> > > +int _ivhd_issue_command(struct iommu_softc *, const struct ivhd_command 
> > > *);
> > > +void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
> > > +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command 
> > > *, int);
> > > +int ivhd_invalidate_domain(struct iommu_softc *, int);
> > > +void ivhd_intr_map(struct iommu_softc *, int);
> > > +
> > > +int
> > > +acpiivhd_intr(void *ctx)
> > > +{
> > > + struct iommu_softc *iommu = ctx;
> > > +
> > > + if (!iommu->dte)
> > > +         return (0);
> > > + ivhd_poll_events(iommu);
> > > + return (1);
> > > +}
> > > +
> > > +/* Setup interrupt for AMD */
> > > +void
> > > +ivhd_intr_map(struct iommu_softc *iommu, int devid) {
> > > + pci_intr_handle_t ih;
> > > +
> > > + if (iommu->intr)
> > > +         return;
> > > + ih.tag = pci_make_tag(NULL, sid_bus(devid), sid_dev(devid), 
> > > sid_fun(devid));
> > > + ih.line = APIC_INT_VIA_MSG;
> > > + ih.pin = 0;
> > > + iommu->intr = pci_intr_establish(NULL, ih, IPL_NET | IPL_MPSAFE,
> > > +                         acpiivhd_intr, iommu, "amd_iommu");
> > > + printf("amd iommu intr: %p\n", iommu->intr);
> > > +}
> > > +
> > > +void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
> > > +{
> > > + char *pfx[] = { "    ", "   ", "  ", " ", "" };
> > > + uint64_t i, sh;
> > > + struct pte_entry *npte;
> > > +  
> > > + for (i = 0; i < 512; i++) {
> > > +         sh = (i << (((lvl-1) * 9) + 12));
> > > +         if (pte[i].val & PTE_P) {
> > > +                 if (lvl > 1) {
> > > +                         npte = (void *)PMAP_DIRECT_MAP((pte[i].val & 
> > > PTE_PADDR_MASK));
> > > +                         printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], 
> > > lvl, 
> > > +                             pte[i].val, (pte[i].val >> 9) & 7);
> > > +                         _dumppte(npte, lvl-1, va | sh);
> > > +                 }
> > > +                 else { 
> > > +                         printf("%slvl%d: %.16llx <- %.16llx \n", 
> > > pfx[lvl], lvl,
> > > +                             pte[i].val, va | sh);
> > > +                 }
> > > +         }
> > > + }
> > > +}
> > > +
> > > +void showpage(int sid, paddr_t paddr)
> > > +{
> > > + struct domain *dom;
> > > + static int show = 0;
> > > +
> > > + if (show > 10)
> > > +         return;
> > > + show++;
> > > + dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
> > > + if (!dom)
> > > +         return;
> > > + printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
> > > +         hwdte[sid].dw0,
> > > +         hwdte[sid].dw1,
> > > +         hwdte[sid].dw2,
> > > +         hwdte[sid].dw3,
> > > +         hwdte[sid].dw4,
> > > +         hwdte[sid].dw5,
> > > +         hwdte[sid].dw6,
> > > +         hwdte[sid].dw7);
> > > + _dumppte(dom->pte, 3, 0);
> > > +}
> > > +
> > > +/* Display AMD IOMMU Error */
> > > +void
> > > +ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int 
> > > head)
> > > +{
> > > + int type, sid, did, flag;
> > > + uint64_t address;
> > > +
> > > + /* Get Device, Domain, Address and Type of event */     
> > > + sid  = __EXTRACT(evt->dw0, EVT_SID);
> > > + type = __EXTRACT(evt->dw1, EVT_TYPE);
> > > + did  = __EXTRACT(evt->dw1, EVT_DID);
> > > + flag = __EXTRACT(evt->dw1, EVT_FLAG);
> > > + address = _get64(&evt->dw2);
> > > +
> > > + printf("=== IOMMU Error[%.4x]: ", head);
> > > + switch (type) {
> > > + case ILLEGAL_DEV_TABLE_ENTRY: // ok
> > > +         printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, 
> > > %s, %s\n",
> > > +            dmar_bdf(sid), address,
> > > +            evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +            evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> > > +            evt->dw1 & EVT_RW ? "write" : "read",
> > > +            evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > +         ivhd_showdte();
> > > +         break;
> > > + case IO_PAGE_FAULT: // ok
> > > +         printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, 
> > > %s, %s, %s, %s\n",
> > > +            dmar_bdf(sid), did, address,
> > > +            evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +            evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
> > > +            evt->dw1 & EVT_PE ? "no perm" : "perm",
> > > +            evt->dw1 & EVT_RW ? "write" : "read",
> > > +            evt->dw1 & EVT_PR ? "present" : "not present",
> > > +            evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > +         ivhd_showdte();
> > > +         showpage(sid, address);
> > > +         break;
> > > + case DEV_TAB_HARDWARE_ERROR: // ok
> > > +         printf("device table hardware error dev=%s addr=0x%.16llx %s, 
> > > %s, %s\n",
> > > +             dmar_bdf(sid), address,
> > > +            evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +            evt->dw1 & EVT_RW ? "write" : "read",
> > > +            evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > +         ivhd_showdte();
> > > +         break;
> > > + case PAGE_TAB_HARDWARE_ERROR:
> > > +         printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, 
> > > %s\n",
> > > +            dmar_bdf(sid), address,
> > > +            evt->dw1 & EVT_TR ? "translation" : "transaction",
> > > +            evt->dw1 & EVT_RW ? "write" : "read",
> > > +            evt->dw1 & EVT_I  ? "interrupt" : "memory");
> > > +         ivhd_showdte();
> > > +         break;
> > > + case ILLEGAL_COMMAND_ERROR: // ok
> > > +         printf("illegal command addr=0x%.16llx\n", address);
> > > +         ivhd_showcmd(iommu);
> > > +         break;
> > > + case COMMAND_HARDWARE_ERROR:
> > > +         printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
> > > +            address, flag);
> > > +         ivhd_showcmd(iommu);
> > > +         break;
> > > + case IOTLB_INV_TIMEOUT:
> > > +         printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
> > > +            dmar_bdf(sid), address);
> > > +         break;
> > > + case INVALID_DEVICE_REQUEST:
> > > +         printf("invalid device request dev=%s addr=0x%.16llx 
> > > flag=0x%.4x\n",
> > > +            dmar_bdf(sid), address, flag);
> > > +         break;
> > > + default:
> > > +         printf("unknown type=0x%.2x\n", type);
> > > +         break;
> > > + }
> > > + //ivhd_showdte();
> > > + /* Clear old event */
> > > + evt->dw0 = 0;
> > > + evt->dw1 = 0;
> > > + evt->dw2 = 0;
> > > + evt->dw3 = 0;
> > > +}
> > > +
> > > +/* AMD: Process IOMMU error from hardware */
> > > +int
> > > +ivhd_poll_events(struct iommu_softc *iommu)
> > > +{
> > > + uint32_t head, tail;
> > > + int sz;
> > > +
> > > + sz = sizeof(struct ivhd_event);
> > > + head = iommu_readl(iommu, EVT_HEAD_REG);
> > > + tail = iommu_readl(iommu, EVT_TAIL_REG);
> > > + if (head == tail) {
> > > +         /* No pending events */
> > > +         return (0);
> > > + }
> > > + while (head != tail) {
> > > +         ivhd_show_event(iommu, iommu->evt_tbl + head, head);
> > > +         head = (head + sz) % EVT_TBL_SIZE;
> > > + }
> > > + iommu_writel(iommu, EVT_HEAD_REG, head);
> > > + return (0);
> > > +}
> > > +
> > > +/* AMD: Issue command to IOMMU queue */
> > > +int
> > > +_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command 
> > > *cmd)
> > > +{
> > > + u_long rf;
> > > + uint32_t head, tail, next;
> > > + int sz;
> > > + 
> > > + head = iommu_readl(iommu, CMD_HEAD_REG);
> > > + sz = sizeof(*cmd);
> > > + rf = intr_disable();
> > > + tail = iommu_readl(iommu, CMD_TAIL_REG);
> > > + next = (tail + sz) % CMD_TBL_SIZE;
> > > + if (next == head) {
> > > +         printf("FULL\n");
> > > +         /* Queue is full */
> > > +         intr_restore(rf);
> > > +         return -EBUSY;
> > > + }
> > > + memcpy(iommu->cmd_tbl + tail, cmd, sz);
> > > + iommu_writel(iommu, CMD_TAIL_REG, next);
> > > + intr_restore(rf);
> > > + return (tail / sz);
> > > +}
> > > +
> > > +#define IVHD_MAXDELAY 8
> > > +
> > > +int
> > > +ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command 
> > > *cmd, int wait)
> > > +{
> > > + struct ivhd_command wq = { 0 };
> > > + volatile uint64_t wv __aligned(16) = 0LL;
> > > + paddr_t paddr;
> > > + int rc, i;
> > > + static int mi;
> > > +

Reply via email to