Despite three days mucking about with it, I can't make PYXIS
iommu stable.  There are a pile of known bugs in the chip that
are semi-documented in NetBSD source, but this doesn't seem
to be one of them.  Or at least isn't a manifestation I would
have expected.  So I turn it off.

Change the way ptes are allocated.  Always search forward for
a new allocation.  If we reach the end, flush the whole TLB.
This works best for the bulk of the core logics, which only
have a tbia.  It probably works best for tsunami too, since
we do less synchronization with the pchips.


r~



diff -rup linux/arch/alpha/kernel/core_apecs.c 2.3.49-2/arch/alpha/kernel/core_apecs.c
--- linux/arch/alpha/kernel/core_apecs.c        Mon Feb 21 02:49:21 2000
+++ 2.3.49-2/arch/alpha/kernel/core_apecs.c     Tue Feb 29 19:26:00 2000
@@ -385,7 +385,7 @@ apecs_init_arch(void)
         * Window 1 is direct access 1GB at 1GB
         * Window 2 is scatter-gather 8MB at 8MB (for isa)
         */
-       hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, PAGE_SIZE);
+       hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
        hose->sg_pci = NULL;
        __direct_map_base = 0x40000000;
        __direct_map_size = 0x40000000;
diff -rup linux/arch/alpha/kernel/core_cia.c 2.3.49-2/arch/alpha/kernel/core_cia.c
--- linux/arch/alpha/kernel/core_cia.c  Mon Feb 21 02:49:21 2000
+++ 2.3.49-2/arch/alpha/kernel/core_cia.c       Tue Feb 29 19:26:39 2000
@@ -405,10 +405,12 @@ cia_init_arch(void)
         * ??? We ought to scale window 1 with memory.
         */
 
-       /* NetBSD hints that page tables must be aligned to 32K due
-          to a hardware bug.  No description of what models affected.  */
-       hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, 32768);
-       hose->sg_pci = iommu_arena_new(0x40000000, 0x08000000, 32768);
+       /* ??? NetBSD hints that page tables must be aligned to 32K,
+          possibly due to a hardware bug.  This is over-aligned
+          from the 8K alignment one would expect for an 8MB window. 
+          No description of what CIA revisions affected.  */
+       hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0x8000);
+       hose->sg_pci = iommu_arena_new(hose, 0x40000000, 0x08000000, 0);
        __direct_map_base = 0x80000000;
        __direct_map_size = 0x80000000;
 
diff -rup linux/arch/alpha/kernel/core_lca.c 2.3.49-2/arch/alpha/kernel/core_lca.c
--- linux/arch/alpha/kernel/core_lca.c  Mon Feb 21 02:49:21 2000
+++ 2.3.49-2/arch/alpha/kernel/core_lca.c       Tue Feb 29 19:26:44 2000
@@ -307,7 +307,7 @@ lca_init_arch(void)
         * Window 0 is direct access 1GB at 1GB
         * Window 1 is scatter-gather 8MB at 8MB (for isa)
         */
-       hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, PAGE_SIZE);
+       hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
        hose->sg_pci = NULL;
        __direct_map_base = 0x40000000;
        __direct_map_size = 0x40000000;
diff -rup linux/arch/alpha/kernel/core_mcpcia.c 
2.3.49-2/arch/alpha/kernel/core_mcpcia.c
--- linux/arch/alpha/kernel/core_mcpcia.c       Mon Feb 21 02:49:42 2000
+++ 2.3.49-2/arch/alpha/kernel/core_mcpcia.c    Tue Feb 29 19:26:49 2000
@@ -404,8 +404,8 @@ mcpcia_startup_hose(struct pci_controler
         * ??? We ought to scale window 1 with memory.
         */
 
-       hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, PAGE_SIZE);
-       hose->sg_pci = iommu_arena_new(0x40000000, 0x08000000, PAGE_SIZE);
+       hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+       hose->sg_pci = iommu_arena_new(hose, 0x40000000, 0x08000000, 0);
        __direct_map_base = 0x80000000;
        __direct_map_size = 0x80000000;
 
diff -rup linux/arch/alpha/kernel/core_pyxis.c 2.3.49-2/arch/alpha/kernel/core_pyxis.c
--- linux/arch/alpha/kernel/core_pyxis.c        Tue Feb 29 21:00:53 2000
+++ 2.3.49-2/arch/alpha/kernel/core_pyxis.c     Mon Mar 19 00:05:07 2068
@@ -36,7 +36,6 @@
  */
 
 #define DEBUG_CONFIG 0
-
 #if DEBUG_CONFIG
 # define DBG_CNF(args) printk args
 #else
@@ -434,6 +433,8 @@ pyxis_broken_pci_tbi(struct pci_controle
        ctrl = *(vuip)PYXIS_CTRL;
        *(vuip)PYXIS_CTRL = ctrl | 4;
        mb();
+       *(vuip)PYXIS_CTRL;
+       mb();
 
        /* Read from PCI dense memory space at TBI_ADDR, skipping 64k
           on each read.  This forces SG TLB misses.  It appears that
@@ -448,6 +449,8 @@ pyxis_broken_pci_tbi(struct pci_controle
        mb();
        *(vuip)PYXIS_CTRL = ctrl;
        mb();
+       *(vuip)PYXIS_CTRL;
+       mb();
 
        __restore_flags(flags);
 }
@@ -480,31 +483,31 @@ pyxis_init_arch(void)
        struct pci_controler *hose;
        unsigned int temp;
 
-#if 0
-       printk("pyxis_init: PYXIS_ERR_MASK 0x%x\n", *(vuip)PYXIS_ERR_MASK);
-       printk("pyxis_init: PYXIS_ERR 0x%x\n", *(vuip)PYXIS_ERR);
-       printk("pyxis_init: PYXIS_INT_REQ 0x%lx\n", *(vulp)PYXIS_INT_REQ);
-       printk("pyxis_init: PYXIS_INT_MASK 0x%lx\n", *(vulp)PYXIS_INT_MASK);
-       printk("pyxis_init: PYXIS_INT_ROUTE 0x%lx\n", *(vulp)PYXIS_INT_ROUTE);
-       printk("pyxis_init: PYXIS_INT_HILO 0x%lx\n", *(vulp)PYXIS_INT_HILO);
-       printk("pyxis_init: PYXIS_INT_CNFG 0x%x\n", *(vuip)PYXIS_INT_CNFG);
-       printk("pyxis_init: PYXIS_RT_COUNT 0x%lx\n", *(vulp)PYXIS_RT_COUNT);
-#endif
-
-       /* 
-        * Set up error reporting. Make sure CPU_PE is OFF in the mask.
-        */
+       /* Set up error reporting. Make sure CPU_PE is OFF in the mask.  */
        temp = *(vuip)PYXIS_ERR_MASK;
-       temp &= ~4;   
-       *(vuip)PYXIS_ERR_MASK = temp;
-       mb();
-       *(vuip)PYXIS_ERR_MASK;  /* re-read to force write */
+       *(vuip)PYXIS_ERR_MASK = temp & ~4;
 
+       /* Enable master/target abort.  */
        temp = *(vuip)PYXIS_ERR;
-       temp |= 0x180;          /* master/target abort */
-       *(vuip)PYXIS_ERR = temp;
+       *(vuip)PYXIS_ERR = temp | 0x180;
+
+       /* Clear the PYXIS_CFG register, which gets used  for PCI Config
+          Space accesses.  That is the way we want to use it, and we do
+          not want to depend on what ARC or SRM might have left behind.  */
+       *(vuip)PYXIS_CFG = 0;
+ 
+       /* Zero the HAEs.  */
+       *(vuip)PYXIS_HAE_MEM = 0;
+       *(vuip)PYXIS_HAE_IO = 0;
+
+       /* Finally, check that the PYXIS_CTRL1 has IOA_BEN set for
+          enabling byte/word PCI bus space(s) access.  */
+       temp = *(vuip)PYXIS_CTRL1;
+       *(vuip)PYXIS_CTRL1 = temp | 1;
+
+       /* Syncronize with all previous changes.  */
        mb();
-       *(vuip)PYXIS_ERR;       /* re-read to force write */
+       *(vuip)PYXIS_REV;
 
        /*
         * Create our single hose.
@@ -531,10 +534,41 @@ pyxis_init_arch(void)
         * address range.
         */
 
-       /* NetBSD hints that page tables must be aligned to 32K due
-          to a hardware bug.  No description of what models affected.  */
-       hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, 32768);
-       hose->sg_pci = iommu_arena_new(0xc0000000, 0x08000000, 32768);
+#if 1
+       /* ??? There's some bit of syncronization wrt writing new tlb
+          entries that's missing.  Sometimes it works, sometimes invalid
+          tlb machine checks, sometimes hard lockup.  And this just within
+          the boot sequence.
+
+          I've tried extra memory barriers, extra alignment, pyxis
+          register reads, tlb flushes, and loopback tlb accesses.
+
+          I guess the pyxis revision in the sx164 is just too buggy...  */
+
+       hose->sg_isa = hose->sg_pci = NULL;
+       __direct_map_base = 0x40000000;
+       __direct_map_size = 0x80000000;
+
+       *(vuip)PYXIS_W0_BASE = 0x40000000 | 1;
+       *(vuip)PYXIS_W0_MASK = (0x40000000 - 1) & 0xfff00000;
+       *(vuip)PYXIS_T0_BASE = 0;
+
+       *(vuip)PYXIS_W1_BASE = 0x80000000 | 1;
+       *(vuip)PYXIS_W1_MASK = (0x40000000 - 1) & 0xfff00000;
+       *(vuip)PYXIS_T1_BASE = 0;
+
+       *(vuip)PYXIS_W2_BASE = 0;
+       *(vuip)PYXIS_W3_BASE = 0;
+
+       alpha_mv.mv_pci_tbi = NULL;
+       mb();
+#else
+       /* ??? NetBSD hints that page tables must be aligned to 32K,
+          possibly due to a hardware bug.  This is over-aligned
+          from the 8K alignment one would expect for an 8MB window. 
+          No description of what CIA revisions affected.  */
+       hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0x08000);
+       hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0x20000);
        __direct_map_base = 0x40000000;
        __direct_map_size = 0x80000000;
 
@@ -560,37 +594,7 @@ pyxis_init_arch(void)
                pyxis_enable_broken_tbi(hose->sg_pci);
 
        alpha_mv.mv_pci_tbi(hose, 0, -1);
-alpha_mv.mv_pci_tbi = 0;
-
-       /*
-        * Next, clear the PYXIS_CFG register, which gets used
-        *  for PCI Config Space accesses. That is the way
-        *  we want to use it, and we do not want to depend on
-        *  what ARC or SRM might have left behind...
-        */
-       temp = *(vuip)PYXIS_CFG;
-       if (temp != 0) {
-               *(vuip)PYXIS_CFG = 0;
-               mb();
-               *(vuip)PYXIS_CFG; /* re-read to force write */
-       }
- 
-       /* Zero the HAE.  */
-       *(vuip)PYXIS_HAE_MEM = 0U; mb();
-       *(vuip)PYXIS_HAE_MEM;   /* re-read to force write */
-       *(vuip)PYXIS_HAE_IO = 0; mb();
-       *(vuip)PYXIS_HAE_IO;    /* re-read to force write */
-
-       /*
-        * Finally, check that the PYXIS_CTRL1 has IOA_BEN set for
-        * enabling byte/word PCI bus space(s) access.
-        */
-       temp = *(vuip) PYXIS_CTRL1;
-       if (!(temp & 1)) {
-               *(vuip)PYXIS_CTRL1 = temp | 1;
-               mb();
-               *(vuip)PYXIS_CTRL1; /* re-read */
-       }
+#endif
 }
 
 static inline void
diff -rup linux/arch/alpha/kernel/core_tsunami.c 
2.3.49-2/arch/alpha/kernel/core_tsunami.c
--- linux/arch/alpha/kernel/core_tsunami.c      Tue Feb 29 20:56:09 2000
+++ 2.3.49-2/arch/alpha/kernel/core_tsunami.c   Tue Feb 29 19:28:39 2000
@@ -343,13 +343,9 @@ tsunami_init_one_pchip(tsunami_pchip *pc
         * because of an idiot-syncrasy of the CYPRESS chip.  It may
         * respond to a PCI bus address in the last 1MB of the 4GB
         * address range.
-        *
-        * Note that the TLB lookup logic uses bitwise concatenation,
-        * not addition, so the required arena alignment is based on
-        * the size of the window.
         */
-       hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, 0x00800000>>10);
-       hose->sg_pci = iommu_arena_new(0xc0000000, 0x08000000, 0x08000000>>10);
+       hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+       hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0);
        __direct_map_base = 0x40000000;
        __direct_map_size = 0x80000000;
 
diff -rup linux/arch/alpha/kernel/pci_impl.h 2.3.49-2/arch/alpha/kernel/pci_impl.h
--- linux/arch/alpha/kernel/pci_impl.h  Mon Feb 21 02:49:21 2000
+++ 2.3.49-2/arch/alpha/kernel/pci_impl.h       Tue Feb 29 20:13:00 2000
@@ -123,6 +123,24 @@ static inline u8 bridge_swizzle(u8 pin, 
    _ctl_; })
 
 
+/* A PCI IOMMU allocation arena.  There are typically two of these
+   regions per bus.  */
+/* ??? The 8400 has a 32-byte pte entry, and the entire table apparently
+   lives directly on the host bridge (no tlb?).  We don't support this
+   machine, but if we ever did, we'd need to parameterize all this quite
+   a bit further.  Probably with per-bus operation tables.  */
+
+struct pci_iommu_arena
+{
+       spinlock_t lock;
+       struct pci_controler *hose;
+       unsigned long *ptes;
+       dma_addr_t dma_base;
+       unsigned int size;
+       unsigned int next_entry;
+};
+
+
 /* The hose list.  */
 extern struct pci_controler *hose_head, **hose_tail;
 extern struct pci_controler *pci_isa_hose;
@@ -132,8 +150,9 @@ extern u8 common_swizzle(struct pci_dev 
 extern struct pci_controler *alloc_pci_controler(void);
 extern struct resource *alloc_resource(void);
 
-extern struct pci_iommu_arena *iommu_arena_new(dma_addr_t, unsigned long,
-                                          unsigned long);
+extern struct pci_iommu_arena *iommu_arena_new(struct pci_controler *,
+                                              dma_addr_t, unsigned long,
+                                              unsigned long);
 extern long iommu_arena_alloc(struct pci_iommu_arena *arena, long n);
 
 extern const char *const pci_io_names[];
diff -rup linux/arch/alpha/kernel/pci_iommu.c 2.3.49-2/arch/alpha/kernel/pci_iommu.c
--- linux/arch/alpha/kernel/pci_iommu.c Tue Feb 29 20:56:09 2000
+++ 2.3.49-2/arch/alpha/kernel/pci_iommu.c      Tue Feb 29 19:54:04 2000
@@ -27,6 +27,8 @@
 # define DBGA2(args...)
 #endif
 
+#define DEBUG_NODIRECT 0
+
 
 static inline unsigned long
 mk_iommu_pte(unsigned long paddr)
@@ -41,23 +43,29 @@ calc_npages(long bytes)
 }
 
 struct pci_iommu_arena *
-iommu_arena_new(dma_addr_t base, unsigned long window_size,
-               unsigned long align)
+iommu_arena_new(struct pci_controler *hose, dma_addr_t base,
+               unsigned long window_size, unsigned long align)
 {
-       unsigned long entries, mem_size, mem_pages;
+       unsigned long mem_size;
        struct pci_iommu_arena *arena;
 
-       entries = window_size >> PAGE_SHIFT;
-       mem_size = entries * sizeof(unsigned long);
-       mem_pages = calc_npages(mem_size);
+       mem_size = window_size / (PAGE_SIZE / sizeof(unsigned long));
+
+       /* Note that the TLB lookup logic uses bitwise concatenation,
+          not addition, so the required arena alignment is based on
+          the size of the window.  Retain the align parameter so that
+          particular systems can over-align the arena.  */
+       if (align < mem_size)
+               align = mem_size;
 
        arena = alloc_bootmem(sizeof(*arena));
-       arena->ptes = __alloc_bootmem(mem_pages * PAGE_SIZE, align, 0);
+       arena->ptes = __alloc_bootmem(mem_size, align, 0);
 
        spin_lock_init(&arena->lock);
+       arena->hose = hose;
        arena->dma_base = base;
        arena->size = window_size;
-       arena->alloc_hint = 0;
+       arena->next_entry = 0;
 
        return arena;
 }
@@ -74,20 +82,22 @@ iommu_arena_alloc(struct pci_iommu_arena
        /* Search forward for the first sequence of N empty ptes.  */
        beg = arena->ptes;
        end = beg + (arena->size >> PAGE_SHIFT);
-       p = beg + arena->alloc_hint;
+       p = beg + arena->next_entry;
        i = 0;
        while (i < n && p < end)
                i = (*p++ == 0 ? i + 1 : 0);
 
-       if (p >= end) {
-               /* Failure.  Assume the hint was wrong and go back to
+       if (i < n) {
+               /* Reached the end.  Flush the TLB and restart the
                   search from the beginning.  */
+               alpha_mv.mv_pci_tbi(arena->hose, 0, -1);
+
                p = beg;
                i = 0;
                while (i < n && p < end)
                        i = (*p++ == 0 ? i + 1 : 0);
 
-               if (p >= end) {
+               if (i < n) {
                        spin_unlock_irqrestore(&arena->lock, flags);
                        return -1;
                }
@@ -100,7 +110,7 @@ iommu_arena_alloc(struct pci_iommu_arena
        for (p = p - n, i = 0; i < n; ++i)
                p[i] = ~1UL;
 
-       arena->alloc_hint = p - beg + n;
+       arena->next_entry = p - beg + n;
        spin_unlock_irqrestore(&arena->lock, flags);
 
        return p - beg;
@@ -115,7 +125,6 @@ iommu_arena_free(struct pci_iommu_arena 
        p = arena->ptes + ofs;
        for (i = 0; i < n; ++i)
                p[i] = 0;
-       arena->alloc_hint = ofs;
 }
 
 /* Map a single buffer of the indicate size for PCI DMA in streaming
@@ -138,6 +147,7 @@ pci_map_single(struct pci_dev *pdev, voi
 
        paddr = virt_to_phys(cpu_addr);
 
+#if !DEBUG_NODIRECT
        /* First check to see if we can use the direct map window.  */
        if (paddr + size + __direct_map_base - 1 <= max_dma
            && paddr + size <= __direct_map_size) {
@@ -148,6 +158,7 @@ pci_map_single(struct pci_dev *pdev, voi
 
                return ret;
        }
+#endif
 
        /* If the machine doesn't define a pci_tbi routine, we have to
           assume it doesn't support sg mapping.  */
@@ -199,6 +210,7 @@ pci_unmap_single(struct pci_dev *pdev, d
        if (direction == PCI_DMA_NONE)
                BUG();
 
+#if !DEBUG_NODIRECT
        if (dma_addr >= __direct_map_base
            && dma_addr < __direct_map_base + __direct_map_size) {
                /* Nothing to do.  */
@@ -208,6 +220,7 @@ pci_unmap_single(struct pci_dev *pdev, d
 
                return;
        }
+#endif
 
        arena = hose->sg_pci;
        if (!arena || dma_addr < arena->dma_base)
@@ -224,10 +237,9 @@ pci_unmap_single(struct pci_dev *pdev, d
 
        npages = calc_npages((dma_addr & ~PAGE_MASK) + size);
        iommu_arena_free(arena, dma_ofs, npages);
-       alpha_mv.mv_pci_tbi(hose, dma_addr, dma_addr + size - 1);
 
-       DBGA2("pci_unmap_single: sg [%x,%lx] np %ld from %p\n",
-             dma_addr, size, npages, __builtin_return_address(0));
+       DBGA("pci_unmap_single: sg [%x,%lx] np %ld from %p\n",
+            dma_addr, size, npages, __builtin_return_address(0));
 }
 
 
@@ -347,6 +359,7 @@ sg_fill(struct scatterlist *leader, stru
        unsigned long *ptes;
        long npages, dma_ofs, i;
 
+#if !DEBUG_NODIRECT
        /* If everything is physically contiguous, and the addresses
           fall into the direct-map window, use it.  */
        if (leader->dma_address == 0
@@ -360,6 +373,7 @@ sg_fill(struct scatterlist *leader, stru
 
                return 0;
        }
+#endif
 
        /* Otherwise, we'll use the iommu to make the pages virtually
           contiguous.  */
@@ -376,56 +390,38 @@ sg_fill(struct scatterlist *leader, stru
        DBGA("    sg_fill: [%p,%lx] -> sg %x np %ld\n",
             leader->address, size, out->dma_address, npages);
 
+       /* All virtually contiguous.  We need to find the length of each
+          physically contiguous subsegment to fill in the ptes.  */
        ptes = &arena->ptes[dma_ofs];
        sg = leader;
-       if (0 && leader->dma_address == 0) {
-               /* All physically contiguous.  We already have the
-                  length, all we need is to fill in the ptes.  */
+       do {
+               struct scatterlist *last_sg = sg;
+
+               size = sg->length;
+               paddr = virt_to_phys(sg->address);
 
-               paddr = virt_to_phys(sg->address) & PAGE_MASK;
+               while (sg+1 < end && (int) sg[1].dma_address == -1) {
+                       size += sg[1].length;
+                       sg++;
+               }
+
+               npages = calc_npages((paddr & ~PAGE_MASK) + size);
+
+               paddr &= PAGE_MASK;
                for (i = 0; i < npages; ++i, paddr += PAGE_SIZE)
                        *ptes++ = mk_iommu_pte(paddr);
 
 #if DEBUG_ALLOC > 0
-               DBGA("    (0) [%p,%x] np %ld\n",
-                    sg->address, sg->length, npages);
-               for (++sg; sg < end && (int) sg->dma_address < 0; ++sg)
+               DBGA("    (%ld) [%p,%x] np %ld\n",
+                    last_sg - leader, last_sg->address,
+                    last_sg->length, npages);
+               while (++last_sg <= sg) {
                        DBGA("        (%ld) [%p,%x] cont\n",
-                            sg - leader, sg->address, sg->length);
-#endif
-       } else {
-               /* All virtually contiguous.  We need to find the
-                  length of each physically contiguous subsegment
-                  to fill in the ptes.  */
-               do {
-                       struct scatterlist *last_sg = sg;
-
-                       size = sg->length;
-                       paddr = virt_to_phys(sg->address);
-
-                       while (sg+1 < end && (int) sg[1].dma_address == -1) {
-                               size += sg[1].length;
-                               sg++;
-                       }
-
-                       npages = calc_npages((paddr & ~PAGE_MASK) + size);
-
-                       paddr &= PAGE_MASK;
-                       for (i = 0; i < npages; ++i, paddr += PAGE_SIZE)
-                               *ptes++ = mk_iommu_pte(paddr);
-
-#if DEBUG_ALLOC > 0
-                       DBGA("    (%ld) [%p,%x] np %ld\n",
                             last_sg - leader, last_sg->address,
-                            last_sg->length, npages);
-                       while (++last_sg <= sg) {
-                               DBGA("        (%ld) [%p,%x] cont\n",
-                                    last_sg - leader, last_sg->address,
-                                    last_sg->length);
-                       }
+                            last_sg->length);
+               }
 #endif
-               } while (++sg < end && (int) sg->dma_address < 0);
-       }
+       } while (++sg < end && (int) sg->dma_address < 0);
 
        return 1;
 }
@@ -472,13 +468,9 @@ pci_map_sg(struct pci_dev *pdev, struct 
        /* Third, iterate over the scatterlist leaders and allocate
           dma space as needed.  */
        for (out = sg; sg < end; ++sg) {
-               int ret;
-
                if ((int) sg->dma_address < 0)
                        continue;
-
-               ret = sg_fill(sg, end, out, arena, max_dma);
-               if (ret < 0)
+               if (sg_fill(sg, end, out, arena, max_dma) < 0)
                        goto error;
                out++;
        }
@@ -517,7 +509,6 @@ pci_unmap_sg(struct pci_dev *pdev, struc
        struct pci_iommu_arena *arena;
        struct scatterlist *end;
        dma_addr_t max_dma;
-       dma_addr_t fstart, fend;
 
        if (direction == PCI_DMA_NONE)
                BUG();
@@ -531,42 +522,32 @@ pci_unmap_sg(struct pci_dev *pdev, struc
        if (!arena || arena->dma_base + arena->size > max_dma)
                arena = hose->sg_isa;
 
-       fstart = -1;
-       fend = 0;
        for (end = sg + nents; sg < end; ++sg) {
                unsigned long addr, size;
+               long npages, ofs;
 
                addr = sg->dma_address;
                size = sg->dma_length;
-
                if (!size)
                        break;
 
+#if !DEBUG_NODIRECT
                if (addr >= __direct_map_base
                    && addr < __direct_map_base + __direct_map_size) {
                        /* Nothing to do.  */
                        DBGA("    (%ld) direct [%lx,%lx]\n",
                              sg - end + nents, addr, size);
-               } else {
-                       long npages, ofs;
-                       dma_addr_t tend;
+                       continue;
+               }
+#endif
 
-                       DBGA("    (%ld) sg [%lx,%lx]\n",
-                             sg - end + nents, addr, size);
+               DBGA("    (%ld) sg [%lx,%lx]\n",
+                    sg - end + nents, addr, size);
 
-                       npages = calc_npages((addr & ~PAGE_MASK) + size);
-                       ofs = (addr - arena->dma_base) >> PAGE_SHIFT;
-                       iommu_arena_free(arena, ofs, npages);
-
-                       tend = addr + size - 1;
-                       if (fstart > addr)
-                               fstart = addr;
-                       if (fend < tend)
-                               fend = tend;
-               }
+               npages = calc_npages((addr & ~PAGE_MASK) + size);
+               ofs = (addr - arena->dma_base) >> PAGE_SHIFT;
+               iommu_arena_free(arena, ofs, npages);
        }
-       if (fend)
-               alpha_mv.mv_pci_tbi(hose, fstart, fend);
 
        DBGA("pci_unmap_sg: %d entries\n", nents - (end - sg));
 }
@@ -580,6 +561,7 @@ pci_dma_supported(struct pci_dev *pdev, 
        struct pci_controler *hose;
        struct pci_iommu_arena *arena;
 
+#if !DEBUG_NODIRECT
        /* If there exists a direct map, and the mask fits either
           MAX_DMA_ADDRESS defined such that GFP_DMA does something
           useful, or the total system memory as shifted by the
@@ -588,6 +570,7 @@ pci_dma_supported(struct pci_dev *pdev, 
            && (__direct_map_base + MAX_DMA_ADDRESS-IDENT_ADDR-1 <= mask
                || __direct_map_base + (max_low_pfn<<PAGE_SHIFT)-1 <= mask))
                return 1;
+#endif
 
        /* Check that we have a scatter-gather arena that fits.  */
        hose = pdev ? pdev->sysdata : pci_isa_hose;
diff -rup linux/include/asm-alpha/pci.h 2.3.49-2/include/asm-alpha/pci.h
--- linux/include/asm-alpha/pci.h       Tue Feb 29 20:56:13 2000
+++ 2.3.49-2/include/asm-alpha/pci.h    Tue Feb 29 19:55:08 2000
@@ -12,22 +12,7 @@
 struct pci_dev;
 struct pci_bus;
 struct resource;
-
-/* A PCI IOMMU allocation arena.  There are typically two of these
-   regions per bus.  */
-/* ??? The 8400 has a 32-byte pte entry, and the entire table apparently
-   lives directly on the host bridge (no tlb?).  We don't support this
-   machine, but if we ever did, we'd need to parameterize all this quite
-   a bit further.  Probably with per-bus operation tables.  */
-
-struct pci_iommu_arena
-{
-       spinlock_t lock;
-       unsigned long *ptes;
-       dma_addr_t dma_base;
-       unsigned int size;
-       unsigned int alloc_hint;
-};
+struct pci_iommu_arena;
 
 /* A controler.  Used to manage multiple PCI busses.  */
 

Reply via email to