Author: will Date: Mon Jul 18 01:55:25 2016 New Revision: 302976 URL: https://svnweb.freebsd.org/changeset/base/302976
Log: libkvm: Improve physical address lookup scaling. Instead of using a hash table to convert physical page addresses to offsets in the sparse page array, cache the number of bits set for each 4MB chunk of physical pages. Upon lookup, find the nearest cached population count, then add/subtract the number of bits from that point to the page's PTE bit. Then multiply by page size and add to the sparse page map's base offset. This replaces O(n) worst-case lookup with O(1) (plus a small number of bits to scan in the bitmap). Also, for a 128GB system, a typical kernel core of about 8GB will now only require ~4.5MB of RAM for this approach instead of ~48MB as with the hash table. More concretely, /usr/sbin/crashinfo against the same core improves from a max RSS of 188MB and wall time of 43.72s (33.25 user 2.94 sys) to 135MB and 9.43s (2.58 user 1.47 sys). Running "thread apply all bt" in kgdb has a similar RSS improvement, and wall time drops from 4.44s to 1.93s. Reviewed by: jhb Sponsored by: Backtrace I/O Modified: head/lib/libkvm/kvm.c head/lib/libkvm/kvm_minidump_aarch64.c head/lib/libkvm/kvm_minidump_amd64.c head/lib/libkvm/kvm_minidump_arm.c head/lib/libkvm/kvm_minidump_i386.c head/lib/libkvm/kvm_minidump_mips.c head/lib/libkvm/kvm_private.c head/lib/libkvm/kvm_private.h Modified: head/lib/libkvm/kvm.c ============================================================================== --- head/lib/libkvm/kvm.c Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm.c Mon Jul 18 01:55:25 2016 (r302976) @@ -283,6 +283,8 @@ kvm_close(kvm_t *kd) free((void *) kd->argspc); if (kd->argv != 0) free((void *)kd->argv); + if (kd->pt_map != NULL) + free(kd->pt_map); free((void *)kd); return (0); Modified: head/lib/libkvm/kvm_minidump_aarch64.c ============================================================================== --- head/lib/libkvm/kvm_minidump_aarch64.c Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm_minidump_aarch64.c Mon Jul 18 01:55:25 2016 (r302976) @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$"); struct vmstate { struct minidumphdr hdr; - struct hpt hpt; uint64_t *page_map; }; @@ -67,7 +66,6 @@ _aarch64_minidump_freevtop(kvm_t *kd) { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); free(vm->page_map); free(vm); kd->vmst = NULL; @@ -77,8 +75,7 @@ static int _aarch64_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint64_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -114,19 +111,12 @@ _aarch64_minidump_initvtop(kvm_t *kd) /* Skip header and msgbuf */ off = AARCH64_PAGE_SIZE + aarch64_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, - "cannot allocate %d bytes for bitmap", - vmst->hdr.bitmapsize); - return (-1); - } - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, - "cannot read %d bytes for page bitmap", - vmst->hdr.bitmapsize); - free(bitmap); + /* build physical address lookup table for sparse pages */ + sparse_off = off + aarch64_round_page(vmst->hdr.bitmapsize) + + aarch64_round_page(vmst->hdr.pmapsize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + AARCH64_PAGE_SIZE, sizeof(uint64_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += aarch64_round_page(vmst->hdr.bitmapsize); @@ -136,7 +126,6 @@ _aarch64_minidump_initvtop(kvm_t *kd) _kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map", vmst->hdr.pmapsize); - free(bitmap); return (-1); } /* This is the end of the dump, savecore may have truncated it. */ @@ -149,15 +138,9 @@ _aarch64_minidump_initvtop(kvm_t *kd) AARCH64_PAGE_SIZE) { _kvm_err(kd, kd->program, "cannot read %d bytes for page_map", vmst->hdr.pmapsize); - free(bitmap); return (-1); } - off += vmst->hdr.pmapsize; - - /* build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - AARCH64_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += aarch64_round_page(vmst->hdr.pmapsize); return (0); } @@ -178,7 +161,7 @@ _aarch64_minidump_vatop(kvm_t *kd, kvadd if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { a = (va - vm->hdr.dmapbase + vm->hdr.dmapphys) & ~AARCH64_PAGE_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_aarch64_minidump_vatop: " "direct map address 0x%jx not in minidump", @@ -198,7 +181,7 @@ _aarch64_minidump_vatop(kvm_t *kd, kvadd goto invalid; } a = l3 & ~AARCH64_ATTR_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_aarch64_minidump_vatop: " "physical address 0x%jx not in minidump", Modified: head/lib/libkvm/kvm_minidump_amd64.c ============================================================================== --- head/lib/libkvm/kvm_minidump_amd64.c Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm_minidump_amd64.c Mon Jul 18 01:55:25 2016 (r302976) @@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$"); struct vmstate { struct minidumphdr hdr; - struct hpt hpt; amd64_pte_t *page_map; }; @@ -66,9 +65,7 @@ _amd64_minidump_freevtop(kvm_t *kd) { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->page_map) - free(vm->page_map); + free(vm->page_map); free(vm); kd->vmst = NULL; } @@ -77,8 +74,7 @@ static int _amd64_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint64_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -116,37 +112,28 @@ _amd64_minidump_initvtop(kvm_t *kd) /* Skip header and msgbuf */ off = AMD64_PAGE_SIZE + amd64_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize); - return (-1); - } - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + amd64_round_page(vmst->hdr.bitmapsize) + + amd64_round_page(vmst->hdr.pmapsize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + AMD64_PAGE_SIZE, sizeof(uint64_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += amd64_round_page(vmst->hdr.bitmapsize); vmst->page_map = _kvm_malloc(kd, vmst->hdr.pmapsize); if (vmst->page_map == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map", vmst->hdr.pmapsize); - free(bitmap); + _kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map", + vmst->hdr.pmapsize); return (-1); } if (pread(kd->pmfd, vmst->page_map, vmst->hdr.pmapsize, off) != (ssize_t)vmst->hdr.pmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page_map", vmst->hdr.pmapsize); - free(bitmap); + _kvm_err(kd, kd->program, "cannot read %d bytes for page_map", + vmst->hdr.pmapsize); return (-1); } - off += vmst->hdr.pmapsize; - - /* build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - AMD64_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += amd64_round_page(vmst->hdr.pmapsize); return (0); } @@ -175,7 +162,7 @@ _amd64_minidump_vatop_v1(kvm_t *kd, kvad goto invalid; } a = pte & AMD64_PG_FRAME; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop_v1: physical address 0x%jx not in minidump", @@ -186,7 +173,7 @@ _amd64_minidump_vatop_v1(kvm_t *kd, kvad return (AMD64_PAGE_SIZE - offset); } else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop_v1: direct map address 0x%jx not in minidump", @@ -235,20 +222,20 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_ } if ((pde & AMD64_PG_PS) == 0) { a = pde & AMD64_PG_FRAME; - ofs = _kvm_hpt_find(&vm->hpt, a); + /* TODO: Just read the single PTE */ + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, - "_amd64_minidump_vatop: pt physical address 0x%jx not in minidump", + "cannot find page table entry for %ju", (uintmax_t)a); goto invalid; } - /* TODO: Just read the single PTE */ if (pread(kd->pmfd, &pt, AMD64_PAGE_SIZE, ofs) != AMD64_PAGE_SIZE) { _kvm_err(kd, kd->program, - "cannot read %d bytes for page table", - AMD64_PAGE_SIZE); - return (-1); + "cannot read page table entry for %ju", + (uintmax_t)a); + goto invalid; } pteindex = (va >> AMD64_PAGE_SHIFT) & (AMD64_NPTEPG - 1); @@ -263,7 +250,7 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_ a = pde & AMD64_PG_PS_FRAME; a += (va & AMD64_PDRMASK) ^ offset; } - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop: physical address 0x%jx not in minidump", @@ -274,7 +261,7 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_ return (AMD64_PAGE_SIZE - offset); } else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop: direct map address 0x%jx not in minidump", Modified: head/lib/libkvm/kvm_minidump_arm.c ============================================================================== --- head/lib/libkvm/kvm_minidump_arm.c Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm_minidump_arm.c Mon Jul 18 01:55:25 2016 (r302976) @@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$"); struct vmstate { struct minidumphdr hdr; - struct hpt hpt; void *ptemap; unsigned char ei_data; }; @@ -69,9 +68,7 @@ _arm_minidump_freevtop(kvm_t *kd) { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->ptemap) - free(vm->ptemap); + free(vm->ptemap); free(vm); kd->vmst = NULL; } @@ -80,8 +77,7 @@ static int _arm_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint32_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -122,18 +118,11 @@ _arm_minidump_initvtop(kvm_t *kd) /* Skip header and msgbuf */ off = ARM_PAGE_SIZE + arm_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for " - "bitmap", vmst->hdr.bitmapsize); - return (-1); - } - - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", - vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + arm_round_page(vmst->hdr.bitmapsize) + + arm_round_page(vmst->hdr.ptesize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + ARM_PAGE_SIZE, sizeof(uint32_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += arm_round_page(vmst->hdr.bitmapsize); @@ -142,7 +131,6 @@ _arm_minidump_initvtop(kvm_t *kd) if (vmst->ptemap == NULL) { _kvm_err(kd, kd->program, "cannot allocate %d bytes for " "ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } @@ -150,16 +138,9 @@ _arm_minidump_initvtop(kvm_t *kd) (ssize_t)vmst->hdr.ptesize) { _kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } - - off += vmst->hdr.ptesize; - - /* Build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - ARM_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += arm_round_page(vmst->hdr.ptesize); return (0); } @@ -209,7 +190,7 @@ _arm_minidump_kvatop(kvm_t *kd, kvaddr_t a = pte & ARM_L2_S_FRAME; } - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_arm_minidump_kvatop: " "physical address 0x%jx not in minidump", Modified: head/lib/libkvm/kvm_minidump_i386.c ============================================================================== --- head/lib/libkvm/kvm_minidump_i386.c Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm_minidump_i386.c Mon Jul 18 01:55:25 2016 (r302976) @@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$"); struct vmstate { struct minidumphdr hdr; - struct hpt hpt; void *ptemap; }; @@ -66,9 +65,7 @@ _i386_minidump_freevtop(kvm_t *kd) { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->ptemap) - free(vm->ptemap); + free(vm->ptemap); free(vm); kd->vmst = NULL; } @@ -77,8 +74,7 @@ static int _i386_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint32_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -110,15 +106,11 @@ _i386_minidump_initvtop(kvm_t *kd) /* Skip header and msgbuf */ off = I386_PAGE_SIZE + i386_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize); - return (-1); - } - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + i386_round_page(vmst->hdr.bitmapsize) + + i386_round_page(vmst->hdr.ptesize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + I386_PAGE_SIZE, sizeof(uint32_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += i386_round_page(vmst->hdr.bitmapsize); @@ -126,21 +118,14 @@ _i386_minidump_initvtop(kvm_t *kd) vmst->ptemap = _kvm_malloc(kd, vmst->hdr.ptesize); if (vmst->ptemap == NULL) { _kvm_err(kd, kd->program, "cannot allocate %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } if (pread(kd->pmfd, vmst->ptemap, vmst->hdr.ptesize, off) != (ssize_t)vmst->hdr.ptesize) { _kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } - off += vmst->hdr.ptesize; - - /* build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - I386_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += i386_round_page(vmst->hdr.ptesize); return (0); } @@ -171,7 +156,7 @@ _i386_minidump_vatop_pae(kvm_t *kd, kvad goto invalid; } a = pte & I386_PG_FRAME_PAE; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_i386_minidump_vatop_pae: physical address 0x%jx not in minidump", @@ -218,7 +203,7 @@ _i386_minidump_vatop(kvm_t *kd, kvaddr_t goto invalid; } a = pte & I386_PG_FRAME; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_i386_minidump_vatop: physical address 0x%jx not in minidump", Modified: head/lib/libkvm/kvm_minidump_mips.c ============================================================================== --- head/lib/libkvm/kvm_minidump_mips.c Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm_minidump_mips.c Mon Jul 18 01:55:25 2016 (r302976) @@ -52,7 +52,6 @@ __FBSDID("$FreeBSD$"); struct vmstate { struct minidumphdr hdr; - struct hpt hpt; void *ptemap; int pte_size; }; @@ -74,9 +73,7 @@ _mips_minidump_freevtop(kvm_t *kd) { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->ptemap) - free(vm->ptemap); + free(vm->ptemap); free(vm); kd->vmst = NULL; } @@ -85,8 +82,7 @@ static int _mips_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint32_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -129,18 +125,11 @@ _mips_minidump_initvtop(kvm_t *kd) /* Skip header and msgbuf */ off = MIPS_PAGE_SIZE + mips_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for " - "bitmap", vmst->hdr.bitmapsize); - return (-1); - } - - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", - vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + mips_round_page(vmst->hdr.bitmapsize) + + mips_round_page(vmst->hdr.ptesize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + MIPS_PAGE_SIZE, sizeof(uint32_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += mips_round_page(vmst->hdr.bitmapsize); @@ -149,7 +138,6 @@ _mips_minidump_initvtop(kvm_t *kd) if (vmst->ptemap == NULL) { _kvm_err(kd, kd->program, "cannot allocate %d bytes for " "ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } @@ -157,16 +145,9 @@ _mips_minidump_initvtop(kvm_t *kd) (ssize_t)vmst->hdr.ptesize) { _kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } - - off += vmst->hdr.ptesize; - - /* Build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - MIPS_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += mips_round_page(vmst->hdr.ptesize); return (0); } @@ -243,7 +224,7 @@ _mips_minidump_kvatop(kvm_t *kd, kvaddr_ } found: - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_mips_minidump_kvatop: physical " "address 0x%jx not in minidump", (uintmax_t)a); Modified: head/lib/libkvm/kvm_private.c ============================================================================== --- head/lib/libkvm/kvm_private.c Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm_private.c Mon Jul 18 01:55:25 2016 (r302976) @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> +#include <assert.h> #include <fcntl.h> #include <kvm.h> #include <limits.h> @@ -213,73 +214,178 @@ bad: return (-1); } -static void -_kvm_hpt_insert(struct hpt *hpt, uint64_t pa, off_t off) +/* + * Transform v such that only bits [bit0, bitN) may be set. Generates a + * bitmask covering the number of bits, then shifts so +bit0+ is the first. + */ +static uint64_t +bitmask_range(uint64_t v, uint64_t bit0, uint64_t bitN) { - struct hpte *hpte; - uint32_t fnv = FNV1_32_INIT; + if (bit0 == 0 && bitN == BITS_IN(v)) + return (v); - fnv = fnv_32_buf(&pa, sizeof(pa), fnv); - fnv &= (HPT_SIZE - 1); - hpte = malloc(sizeof(*hpte)); - hpte->pa = pa; - hpte->off = off; - hpte->next = hpt->hpt_head[fnv]; - hpt->hpt_head[fnv] = hpte; + return (v & (((1ULL << (bitN - bit0)) - 1ULL) << bit0)); } -void -_kvm_hpt_init(kvm_t *kd, struct hpt *hpt, void *base, size_t len, off_t off, - int page_size, int word_size) +/* + * Returns the number of bits in a given byte array range starting at a + * given base, from bit0 to bitN. bit0 may be non-zero in the case of + * counting backwards from bitN. + */ +static uint64_t +popcount_bytes(uint64_t *addr, uint32_t bit0, uint32_t bitN) { - uint64_t bits, idx, pa; - uint64_t *base64; - uint32_t *base32; - - base64 = base; - base32 = base; - for (idx = 0; idx < len / word_size; idx++) { - if (word_size == sizeof(uint64_t)) - bits = _kvm64toh(kd, base64[idx]); - else - bits = _kvm32toh(kd, base32[idx]); - pa = idx * word_size * NBBY * page_size; - for (; bits != 0; bits >>= 1, pa += page_size) { - if ((bits & 1) == 0) - continue; - _kvm_hpt_insert(hpt, pa, off); - off += page_size; - } + uint32_t res = bitN - bit0; + uint64_t count = 0; + uint32_t bound; + + /* Align to 64-bit boundary on the left side if needed. */ + if ((bit0 % BITS_IN(*addr)) != 0) { + bound = MIN(bitN, roundup2(bit0, BITS_IN(*addr))); + count += __bitcount64(bitmask_range(*addr, bit0, bound)); + res -= (bound - bit0); + addr++; + } + + while (res > 0) { + bound = MIN(res, BITS_IN(*addr)); + count += __bitcount64(bitmask_range(*addr, 0, bound)); + res -= bound; + addr++; } + + return (count); } -off_t -_kvm_hpt_find(struct hpt *hpt, uint64_t pa) +int +_kvm_pt_init(kvm_t *kd, size_t map_len, off_t map_off, off_t sparse_off, + int page_size, int word_size) { - struct hpte *hpte; - uint32_t fnv = FNV1_32_INIT; + uint64_t *addr; + uint32_t *popcount_bin; + int bin_popcounts = 0; + uint64_t pc_bins, res; + ssize_t rd; - fnv = fnv_32_buf(&pa, sizeof(pa), fnv); - fnv &= (HPT_SIZE - 1); - for (hpte = hpt->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) { - if (pa == hpte->pa) - return (hpte->off); + /* + * Map the bitmap specified by the arguments. + */ + kd->pt_map = _kvm_malloc(kd, map_len); + if (kd->pt_map == NULL) { + _kvm_err(kd, kd->program, "cannot allocate %zu bytes for bitmap", + map_len); + return (-1); } - return (-1); + rd = pread(kd->pmfd, kd->pt_map, map_len, map_off); + if (rd < 0 || rd != (ssize_t)map_len) { + _kvm_err(kd, kd->program, "cannot read %zu bytes for bitmap", + map_len); + return (-1); + } + kd->pt_map_size = map_len; + + /* + * Generate a popcount cache for every POPCOUNT_BITS in the bitmap, + * so lookups only have to calculate the number of bits set between + * a cache point and their bit. This reduces lookups to O(1), + * without significantly increasing memory requirements. + * + * Round up the number of bins so that 'upper half' lookups work for + * the final bin, if needed. The first popcount is 0, since no bits + * precede bit 0, so add 1 for that also. Without this, extra work + * would be needed to handle the first PTEs in _kvm_pt_find(). + */ + addr = kd->pt_map; + res = map_len; + pc_bins = 1 + (res * NBBY + POPCOUNT_BITS / 2) / POPCOUNT_BITS; + kd->pt_popcounts = calloc(pc_bins, sizeof(uint32_t)); + if (kd->pt_popcounts == NULL) + return (-1); + + for (popcount_bin = &kd->pt_popcounts[1]; res > 0; + addr++, res -= sizeof(*addr)) { + *popcount_bin += popcount_bytes(addr, 0, + MIN(res * NBBY, BITS_IN(*addr))); + if (++bin_popcounts == POPCOUNTS_IN(*addr)) { + popcount_bin++; + *popcount_bin = *(popcount_bin - 1); + bin_popcounts = 0; + } + } + + assert(pc_bins * sizeof(*popcount_bin) == + ((uintptr_t)popcount_bin - (uintptr_t)kd->pt_popcounts)); + + kd->pt_sparse_off = sparse_off; + kd->pt_sparse_size = (uint64_t)*popcount_bin * PAGE_SIZE; + kd->pt_page_size = page_size; + kd->pt_word_size = word_size; + return (0); } -void -_kvm_hpt_free(struct hpt *hpt) +/* + * Find the offset for the given physical page address; returns -1 otherwise. + * + * A page's offset is represented by the sparse page base offset plus the + * number of bits set before its bit multiplied by PAGE_SIZE. This means + * that if a page exists in the dump, it's necessary to know how many pages + * in the dump precede it. Reduce this O(n) counting to O(1) by caching the + * number of bits set at POPCOUNT_BITS intervals. + * + * Then to find the number of pages before the requested address, simply + * index into the cache and count the number of bits set between that cache + * bin and the page's bit. Halve the number of bytes that have to be + * checked by also counting down from the next higher bin if it's closer. + */ +off_t +_kvm_pt_find(kvm_t *kd, uint64_t pa) { - struct hpte *hpte, *next; - int i; + uint64_t *bitmap = kd->pt_map; + uint64_t pte_bit_id = pa / PAGE_SIZE; + uint64_t pte_u64 = pte_bit_id / BITS_IN(*bitmap); + uint64_t popcount_id = pte_bit_id / POPCOUNT_BITS; + uint64_t pte_mask = 1ULL << (pte_bit_id % BITS_IN(*bitmap)); + uint64_t bitN; + uint32_t count; + + /* Check whether the page address requested is in the dump. */ + if (pte_bit_id >= (kd->pt_map_size * NBBY) || + (bitmap[pte_u64] & pte_mask) == 0) + return (-1); - for (i = 0; i < HPT_SIZE; i++) { - for (hpte = hpt->hpt_head[i]; hpte != NULL; hpte = next) { - next = hpte->next; - free(hpte); - } + /* + * Add/sub popcounts from the bitmap until the PTE's bit is reached. + * For bits that are in the upper half between the calculated + * popcount id and the next one, use the next one and subtract to + * minimize the number of popcounts required. + */ + if ((pte_bit_id % POPCOUNT_BITS) < (POPCOUNT_BITS / 2)) { + count = kd->pt_popcounts[popcount_id] + popcount_bytes( + bitmap + popcount_id * POPCOUNTS_IN(*bitmap), + 0, pte_bit_id - popcount_id * POPCOUNT_BITS); + } else { + /* + * Counting in reverse is trickier, since we must avoid + * reading from bytes that are not in range, and invert. + */ + uint64_t pte_u64_bit_off = pte_u64 * BITS_IN(*bitmap); + + popcount_id++; + bitN = MIN(popcount_id * POPCOUNT_BITS, + kd->pt_map_size * BITS_IN(uint8_t)); + count = kd->pt_popcounts[popcount_id] - popcount_bytes( + bitmap + pte_u64, + pte_bit_id - pte_u64_bit_off, bitN - pte_u64_bit_off); } + + /* + * This can only happen if the core is truncated. Treat these + * entries as if they don't exist, since their backing doesn't. + */ + if (count >= (kd->pt_sparse_size / PAGE_SIZE)) + return (-1); + + return (kd->pt_sparse_off + (uint64_t)count * PAGE_SIZE); } static int Modified: head/lib/libkvm/kvm_private.h ============================================================================== --- head/lib/libkvm/kvm_private.h Mon Jul 18 01:03:39 2016 (r302975) +++ head/lib/libkvm/kvm_private.h Mon Jul 18 01:55:25 2016 (r302976) @@ -97,23 +97,21 @@ struct __kvm { uintptr_t *dpcpu_off; /* base array, indexed by CPU ID */ u_int dpcpu_curcpu; /* CPU we're currently working with */ kvaddr_t dpcpu_curoff; /* dpcpu base of current CPU */ -}; -/* - * Page table hash used by minidump backends to map physical addresses - * to file offsets. - */ -struct hpte { - struct hpte *next; - uint64_t pa; - off_t off; + /* Page table lookup structures. */ + uint64_t *pt_map; + size_t pt_map_size; + off_t pt_sparse_off; + uint64_t pt_sparse_size; + uint32_t *pt_popcounts; + unsigned int pt_page_size; + unsigned int pt_word_size; }; -#define HPT_SIZE 1024 - -struct hpt { - struct hpte *hpt_head[HPT_SIZE]; -}; +/* Page table lookup constants. */ +#define POPCOUNT_BITS 1024 +#define BITS_IN(v) (sizeof(v) * NBBY) +#define POPCOUNTS_IN(v) (POPCOUNT_BITS / BITS_IN(v)) /* * Functions used internally by kvm, but across kvm modules. @@ -154,6 +152,5 @@ kvaddr_t _kvm_dpcpu_validaddr(kvm_t *, k int _kvm_probe_elf_kernel(kvm_t *, int, int); int _kvm_is_minidump(kvm_t *); int _kvm_read_core_phdrs(kvm_t *, size_t *, GElf_Phdr **); -void _kvm_hpt_init(kvm_t *, struct hpt *, void *, size_t, off_t, int, int); -off_t _kvm_hpt_find(struct hpt *, uint64_t); -void _kvm_hpt_free(struct hpt *); +int _kvm_pt_init(kvm_t *, size_t, off_t, off_t, int, int); +off_t _kvm_pt_find(kvm_t *, uint64_t); _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"