Module Name: src Committed By: skrll Date: Wed Apr 27 07:53:24 UTC 2022
Modified Files: src/sys/arch/arm/apple: apple_dart.c Log Message: Sync with OpenBSD - Use subpage feature - Catch up with new device tree bindings - Skip locked DARTs - Use bypass mode for USB3 DARTs Other changes from me - Sprinkle BITS(3) - Improve interrupt error handling To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/arch/arm/apple/apple_dart.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/arm/apple/apple_dart.c diff -u src/sys/arch/arm/apple/apple_dart.c:1.3 src/sys/arch/arm/apple/apple_dart.c:1.4 --- src/sys/arch/arm/apple/apple_dart.c:1.3 Mon Sep 6 14:03:17 2021 +++ src/sys/arch/arm/apple/apple_dart.c Wed Apr 27 07:53:24 2022 @@ -1,4 +1,5 @@ -/* $NetBSD: apple_dart.c,v 1.3 2021/09/06 14:03:17 jmcneill Exp $ */ +/* $NetBSD: apple_dart.c,v 1.4 2022/04/27 07:53:24 skrll Exp $ */ +/* $OpenBSD: apldart.c,v 1.10 2022/02/27 17:36:52 kettenis Exp $ */ /*- * Copyright (c) 2021 Mark Kettenis <kette...@openbsd.org> @@ -20,7 +21,7 @@ //#define APPLE_DART_DEBUG #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: apple_dart.c,v 1.3 2021/09/06 14:03:17 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: apple_dart.c,v 1.4 2022/04/27 07:53:24 skrll Exp $"); #include <sys/param.h> #include <sys/bus.h> @@ -34,37 +35,78 @@ __KERNEL_RCSID(0, "$NetBSD: apple_dart.c #include <arm/cpufunc.h> #include <dev/fdt/fdtvar.h> +/* + * This driver largely ignores stream IDs and simply uses a single + * translation table for all the devices that it serves. This is good + * enough for the PCIe host bridge that serves the on-board devices on + * the current generation Apple Silicon Macs as these only have a + * single PCIe device behind each DART. + */ /* * DART registers */ +#define DART_PARAMS2 0x0004 +#define DART_PARAMS2_BYPASS_SUPPORT __BIT(0) #define DART_TLB_OP 0x0020 -#define DART_TLB_OP_FLUSH __BIT(20) -#define DART_TLB_OP_BUSY __BIT(2) +#define DART_TLB_OP_BUSY __BIT(2) +#define DART_TLB_OP_FLUSH __BIT(20) #define DART_TLB_OP_SIDMASK 0x0034 #define DART_ERR_STATUS 0x0040 +#define DART_ERR_FLAG __BIT(31) +#define DART_ERR_STREAM_MASK __BITS(27, 24) +#define DART_ERR_CODE_MASK __BITS(11, 0) +#define DART_ERR_READ_FAULT __BIT(4) +#define DART_ERR_WRITE_FAULT __BIT(3) +#define DART_ERR_NOPTE __BIT(2) +#define DART_ERR_NOPMD __BIT(1) +#define DART_ERR_NOTTBR __BIT(0) #define DART_ERR_ADDRL 0x0050 #define DART_ERR_ADDRH 0x0054 -#define DART_CONFIG(sid) (0x0100 + (sid) * 0x4) -#define DART_CONFIG_TXEN __BIT(7) +#define DART_CONFIG 0x0060 +#define DART_CONFIG_LOCK __BIT(15) +#define DART_TCR(sid) (0x0100 + (sid) * 0x4) +#define DART_TCR_TRANSLATE_ENABLE __BIT(7) +#define DART_TCR_BYPASS_DART __BIT(8) +#define DART_TCR_BYPASS_DAPF __BIT(12) #define DART_TTBR(sid, idx) (0x0200 + (sid) * 0x10 + (idx) * 0x4) -#define DART_TTBR_VALID __BIT(31) -#define DART_TTBR_SHIFT 12 +#define DART_TTBR_VALID __BIT(31) +#define DART_TTBR_SHIFT 12 + +#define DART_NUM_STREAMS 16 +#define DART_ALL_STREAMS ((1 << DART_NUM_STREAMS) - 1) #define DART_APERTURE_START 0x00100000 #define DART_APERTURE_SIZE 0x3fe00000 #define DART_PAGE_SIZE 16384 #define DART_PAGE_MASK (DART_PAGE_SIZE - 1) -#define DART_L1_TABLE 0xb +/* + * Some hardware (e.g. bge(4)) will always use (aligned) 64-bit memory + * access. To make sure this doesn't fault, round the subpage limits + * down and up accordingly. + */ +#define DART_OFFSET_MASK 7 + +#define DART_L1_TABLE 0x3 #define DART_L2_INVAL 0x0 -#define DART_L2_PAGE 0x3 +#define DART_L2_VALID __BIT(0) +#define DART_L2_FULL_PAGE __BIT(1) + +#define DART_L2_START_MASK __BITS(63, 52) +#define DART_L2_END_MASK __BITS(51, 40) +#define DART_L2_SUBPAGE(addr) __SHIFTOUT((addr), __BITS(13, 2)) +#define DART_L2_START(addr) __SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_START_MASK) +#define DART_L2_END(addr) __SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_END_MASK) #define DART_ROUND_PAGE(pa) (((pa) + DART_PAGE_MASK) & ~DART_PAGE_MASK) #define DART_TRUNC_PAGE(pa) ((pa) & ~DART_PAGE_MASK) +#define DART_ROUND_OFFSET(pa) (((pa) + DART_OFFSET_MASK) & ~DART_OFFSET_MASK) +#define DART_TRUNC_OFFSET(pa) ((pa) & ~DART_OFFSET_MASK) static const struct device_compatible_entry compat_data[] = { { .compat = "apple,dart-m1", .value = 16 }, + { .compat = "apple,t8103-dart", .value = 16 }, DEVICE_COMPAT_EOL }; @@ -188,14 +230,42 @@ apple_dart_intr(void *priv) uint32_t status; status = DART_READ(sc, DART_ERR_STATUS); - addr = DART_READ(sc, DART_ERR_ADDRL); - addr |= (uint64_t)DART_READ(sc, DART_ERR_ADDRH) << 32; + addr = __SHIFTIN(DART_READ(sc, DART_ERR_ADDRL), __BITS(31, 0)); + addr |= __SHIFTIN(DART_READ(sc, DART_ERR_ADDRH), __BITS(63, 32)); DART_WRITE(sc, DART_ERR_STATUS, status); + if ((status & DART_ERR_FLAG) == 0) + return 1; + +#ifdef APPLE_DART_DEBUG + printf("%s: status %#"PRIx32"\n", __func__, status); + printf("%s: addrl %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRL)); + printf("%s: addrh %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRH)); +#endif + + const char *reason = NULL; + int32_t code = __SHIFTOUT(status, DART_ERR_CODE_MASK); + switch (code) { + case DART_ERR_NOTTBR: + reason = "no ttbr for address"; + break; + case DART_ERR_NOPMD: + reason = "no pmd for address"; + break; + case DART_ERR_NOPTE: + reason = "no pte for address"; + break; + case DART_ERR_WRITE_FAULT: + reason = "write fault"; + break; + case DART_ERR_READ_FAULT: + reason = "read fault"; + break; + } fdtbus_get_path(sc->sc_phandle, fdt_path, sizeof(fdt_path)); - printf("%s (%s): error addr 0x%016lx status 0x%08x\n", - device_xname(sc->sc_dev), fdt_path, addr, status); + printf("%s (%s): error addr 0x%016lx status 0x%08x: %s\n", + device_xname(sc->sc_dev), fdt_path, addr, status, reason); return 1; } @@ -206,9 +276,8 @@ apple_dart_lookup_tte(struct apple_dart_ int idx = dva / DART_PAGE_SIZE; int l2_idx = idx / (DART_PAGE_SIZE / sizeof(uint64_t)); int tte_idx = idx % (DART_PAGE_SIZE / sizeof(uint64_t)); - volatile uint64_t *l2; + volatile uint64_t *l2 = DART_DMA_KVA(sc->sc_l2[l2_idx]); - l2 = DART_DMA_KVA(sc->sc_l2[l2_idx]); return &l2[tte_idx]; } @@ -288,13 +357,22 @@ apple_dart_load_map(struct apple_dart_so map->dm_segs[seg].ds_addr = dva + off; pa = DART_TRUNC_PAGE(pa); + paddr_t start = DART_TRUNC_OFFSET(off); + paddr_t end = DART_PAGE_MASK; while (len > 0) { tte = apple_dart_lookup_tte(sc, dva); - *tte = pa | DART_L2_PAGE; + if (len < DART_PAGE_SIZE) + end = DART_ROUND_OFFSET(len) - 1; + *tte = pa | DART_L2_VALID | + DART_L2_START(start) | DART_L2_END(end); +#ifdef APPLE_DART_DEBUG + printf("tte %p = %"PRIx64"\n", tte, *tte); +#endif pa += DART_PAGE_SIZE; dva += DART_PAGE_SIZE; len -= DART_PAGE_SIZE; + start = 0; } } @@ -459,8 +537,6 @@ apple_dart_attach(device_t parent, devic struct apple_dart_softc * const sc = device_private(self); struct fdt_attach_args * const faa = aux; const int phandle = faa->faa_phandle; - uint64_t sidmask64; - uint32_t sidmask32; char intrstr[128]; volatile uint64_t *l1; bus_addr_t addr; @@ -486,16 +562,35 @@ apple_dart_attach(device_t parent, devic aprint_error(": couldn't map registers\n"); return; } - sc->sc_nsid = of_compatible_lookup(phandle, compat_data)->value; - if (of_getprop_uint64(phandle, "sid-mask", &sidmask64) == 0) { - sc->sc_sid_mask = sidmask64; - } else if (of_getprop_uint32(phandle, "sid-mask", &sidmask32) == 0) { - sc->sc_sid_mask = sidmask32; - } else { - sc->sc_sid_mask = 0xffff; + /* Skip locked DARTs for now. */ + uint32_t config = DART_READ(sc, DART_CONFIG); + if (config & DART_CONFIG_LOCK) { + aprint_naive("\n"); + aprint_normal(": locked\n"); + return; + } + + /* + * Use bypass mode if supported. This avoids an issue with + * the USB3 controllers which need mappings entered into two + * IOMMUs, which is somewhat difficult to implement with our + * current kernel interfaces. + */ + uint32_t params2 = DART_READ(sc, DART_PARAMS2); + if (params2 & DART_PARAMS2_BYPASS_SUPPORT) { + for (sid = 0; sid < DART_NUM_STREAMS; sid++) { + DART_WRITE(sc, DART_TCR(sid), + DART_TCR_BYPASS_DART | DART_TCR_BYPASS_DAPF); + } + aprint_naive("\n"); + aprint_normal(": bypass\n"); + return; } + sc->sc_nsid = of_compatible_lookup(phandle, compat_data)->value; + sc->sc_sid_mask = __MASK(sc->sc_nsid); + aprint_naive("\n"); aprint_normal(": Apple DART @ %#lx/%#lx, %u SIDs (mask 0x%lx)\n", addr, size, sc->sc_nsid, sc->sc_sid_mask); @@ -513,7 +608,7 @@ apple_dart_attach(device_t parent, devic /* Disable translations */ for (sid = 0; sid < sc->sc_nsid; sid++) { - DART_WRITE(sc, DART_CONFIG(sid), 0); + DART_WRITE(sc, DART_TCR(sid), 0); } /* Remove page tables */ @@ -528,6 +623,9 @@ apple_dart_attach(device_t parent, devic * Build translation tables. We pre-allocate the translation * tables for the entire aperture such that we don't have to worry * about growing them in an mpsafe manner later. + * + * Cover the entire address space [0, ..._START + ..._SIZE) even if vmem + * only allocates from [..._START, ..._START + ...+SIZE) */ const u_int ntte = howmany(DART_APERTURE_START + DART_APERTURE_SIZE - 1, @@ -553,23 +651,35 @@ apple_dart_attach(device_t parent, devic "couldn't allocate L2 tables\n"); return; } + l1[idx] = DART_DMA_DVA(sc->sc_l2[idx]) | DART_L1_TABLE; +#ifdef APPLE_DART_DEBUG + printf("l1[%d] (%p) = %"PRIx64"\n", idx, &l1[idx], l1[idx]); +#endif } /* Install page tables */ for (sid = 0; sid < sc->sc_nsid; sid++) { pa = DART_DMA_DVA(sc->sc_l1); for (idx = 0; idx < nl1; idx++) { + KASSERTMSG(__SHIFTOUT(pa, __BITS(DART_TTBR_SHIFT - 1, 0)) == 0, + "TTBR pa is not correctly aligned %" PRIxPADDR, pa); + DART_WRITE(sc, DART_TTBR(sid, idx), (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID); pa += DART_PAGE_SIZE; +#ifdef APPLE_DART_DEBUG + printf("writing %"PRIx64" to %"PRIx32"\n", + (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID, + DART_TTBR(sid, idx)); +#endif } } apple_dart_flush_tlb(sc); /* Enable translations */ for (sid = 0; sid < sc->sc_nsid; sid++) { - DART_WRITE(sc, DART_CONFIG(sid), DART_CONFIG_TXEN); + DART_WRITE(sc, DART_TCR(sid), DART_TCR_TRANSLATE_ENABLE); } ih = fdtbus_intr_establish_xname(phandle, 0, IPL_HIGH, FDT_INTR_MPSAFE,