Changeset: 9f9a288a6902 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/9f9a288a6902 Modified Files: gdk/gdk.h gdk/gdk_bbp.c gdk/gdk_heap.c gdk/gdk_storage.c sql/storage/bat/bat_logger.c Branch: Jul2021 Log Message:
Don't save empty heap files. diffs (270 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -560,7 +560,8 @@ typedef struct { bool hashash:1, /* the string heap contains hash values */ cleanhash:1, /* string heaps must clean hash */ dirty:1, /* specific heap dirty marker */ - remove:1; /* remove storage file when freeing */ + remove:1, /* remove storage file when freeing */ + wasempty:1; /* heap was empty when last saved/created */ storage_t storage; /* storage mode (mmap/malloc). */ storage_t newstorage; /* new desired storage mode at re-allocation. */ bat parentid; /* cache id of VIEW parent bat */ diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -760,45 +760,47 @@ BBPcheckbats(unsigned bbpversion) /* no files needed */ continue; } - path = GDKfilepath(0, BATDIR, b->theap->filename, NULL); - if (path == NULL) - return GDK_FAIL; + if (b->theap->free > 0) { + path = GDKfilepath(0, BATDIR, b->theap->filename, NULL); + if (path == NULL) + return GDK_FAIL; #ifdef GDKLIBRARY_TAILN - /* if bbpversion > GDKLIBRARY_TAILN, the offset heap can - * exist with either name .tail1 (etc) or .tail, if <= - * GDKLIBRARY_TAILN, only with .tail */ - char tailsave = 0; - size_t taillen = 0; - if (b->ttype == TYPE_str && - b->twidth < SIZEOF_VAR_T) { - /* old version: .tail, not .tail1, .tail2, .tail4 */ - taillen = strlen(path) - 1; - tailsave = path[taillen]; - path[taillen] = 0; - } + /* if bbpversion > GDKLIBRARY_TAILN, the offset heap can + * exist with either name .tail1 (etc) or .tail, if <= + * GDKLIBRARY_TAILN, only with .tail */ + char tailsave = 0; + size_t taillen = 0; + if (b->ttype == TYPE_str && + b->twidth < SIZEOF_VAR_T) { + /* old version: .tail, not .tail1, .tail2, .tail4 */ + taillen = strlen(path) - 1; + tailsave = path[taillen]; + path[taillen] = 0; + } #endif - if (MT_stat(path, &statb) < 0 + if (MT_stat(path, &statb) < 0 #ifdef GDKLIBRARY_TAILN - && bbpversion > GDKLIBRARY_TAILN - && b->ttype == TYPE_str - && b->twidth < SIZEOF_VAR_T - && (path[taillen] = tailsave) != 0 - && MT_stat(path, &statb) < 0 + && bbpversion > GDKLIBRARY_TAILN + && b->ttype == TYPE_str + && b->twidth < SIZEOF_VAR_T + && (path[taillen] = tailsave) != 0 + && MT_stat(path, &statb) < 0 #endif - ) { - - GDKsyserror("cannot stat file %s (expected size %zu)\n", - path, b->theap->free); + ) { + + GDKsyserror("cannot stat file %s (expected size %zu)\n", + path, b->theap->free); + GDKfree(path); + return GDK_FAIL; + } + if ((size_t) statb.st_size < b->theap->free) { + GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->theap->free, (size_t) statb.st_size); + GDKfree(path); + return GDK_FAIL; + } GDKfree(path); - return GDK_FAIL; } - if ((size_t) statb.st_size < b->theap->free) { - GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->theap->free, (size_t) statb.st_size); - GDKfree(path); - return GDK_FAIL; - } - GDKfree(path); - if (b->tvheap != NULL) { + if (b->tvheap != NULL && b->tvheap->free > 0) { path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap"); if (path == NULL) return GDK_FAIL; @@ -2386,7 +2388,7 @@ decref(bat i, bool logical, bool release * while locked so no other thread thinks it's * available anymore */ assert((BBP_status(i) & BBPUNLOADING) == 0); - TRC_DEBUG(BAT_, "%s set to unloading BAT %d\n", func, i); + TRC_DEBUG(BAT_, "%s set to unloading BAT %d (status %u)\n", func, i, BBP_status(i)); BBP_status_on(i, BBPUNLOADING); swap = true; } @@ -2930,6 +2932,10 @@ do_backup(const char *srcdir, const char char extnew[16]; bool istail = strncmp(ext, "tail", 4) == 0; + if (h->wasempty) { + return GDK_SUCCEED; + } + /* direct mmap is unprotected (readonly usage, or has WAL * protection); however, if we're backing up for subcommit * and a backup already exists in the main backup directory diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c --- a/gdk/gdk_heap.c +++ b/gdk/gdk_heap.c @@ -103,6 +103,7 @@ HEAPgrow(const Heap *old, size_t size) .dirty = true, .remove = old->remove, .parentid = old->parentid, + .wasempty = old->wasempty, }; memcpy(new->filename, old->filename, sizeof(new->filename)); if (HEAPalloc(new, size, 1, 1) != GDK_SUCCEED) { @@ -147,7 +148,7 @@ HEAPalloc(Heap *h, size_t nitems, size_t h->size < (h->farmid == 0 ? GDK_mmap_minsize_persistent : GDK_mmap_minsize_transient))) { h->storage = STORE_MEM; h->base = GDKmalloc(h->size); - TRC_DEBUG(HEAP, "HEAPalloc %zu %p\n", h->size, h->base); + TRC_DEBUG(HEAP, "%s %zu %p\n", h->filename, h->size, h->base); } if (!GDKinmemory(h->farmid) && h->base == NULL) { char *nme; @@ -244,7 +245,7 @@ HEAPextend(Heap *h, size_t size, bool ma if (!must_mmap) { h->newstorage = h->storage = STORE_MEM; h->base = GDKrealloc(h->base, size); - TRC_DEBUG(HEAP, "Extending malloced heap %zu %zu %p %p\n", size, h->size, bak.base, h->base); + TRC_DEBUG(HEAP, "Extending malloced heap %s %zu %zu %p %p\n", h->filename, size, h->size, bak.base, h->base); h->size = size; if (h->base) return GDK_SUCCEED; /* success */ @@ -288,7 +289,7 @@ HEAPextend(Heap *h, size_t size, bool ma h->newstorage = h->storage; h->base = NULL; - TRC_DEBUG(HEAP, "Converting malloced to %s mmapped heap\n", h->newstorage == STORE_MMAP ? "shared" : "privately"); + TRC_DEBUG(HEAP, "Converting malloced to %s mmapped heap %s\n", h->newstorage == STORE_MMAP ? "shared" : "privately", h->filename); /* try to allocate a memory-mapped based * heap */ if (HEAPload(h, nme, ext, false) == GDK_SUCCEED) { @@ -337,10 +338,8 @@ HEAPshrink(Heap *h, size_t size) assert(size <= h->size); if (h->storage == STORE_MEM) { p = GDKrealloc(h->base, size); - TRC_DEBUG(HEAP, "Shrinking malloced " - "heap %zu %zu %p " - "%p\n", h->size, size, - h->base, p); + TRC_DEBUG(HEAP, "Shrinking malloced heap %s %zu %zu %p %p\n", + h->filename, h->size, size, h->base, p); } else { char *path; @@ -548,6 +547,7 @@ GDKupgradevarheap(BAT *b, var_t v, BUN c .dirty = true, .remove = old->remove, .parentid = old->parentid, + .wasempty = old->wasempty, }; settailname(new, BBP_physical(b->batCacheid), b->ttype, width); if (HEAPalloc(new, newsize, 1, 1) != GDK_SUCCEED) { @@ -673,7 +673,7 @@ HEAPfree(Heap *h, bool rmheap) { if (h->base) { if (h->storage == STORE_MEM) { /* plain memory */ - TRC_DEBUG(HEAP, "HEAPfree %zu %p\n", h->size, h->base); + TRC_DEBUG(HEAP, "HEAPfree %s %zu %p\n", h->filename, h->size, h->base); GDKfree(h->base); } else if (h->storage == STORE_CMEM) { //heap is stored in regular C memory rather than GDK memory,so we call free() @@ -773,7 +773,7 @@ HEAPload_intern(Heap *h, const char *nme } } - TRC_DEBUG(HEAP, "HEAPload(%s%s%s,storage=%d,free=%zu,size=%zu)\n", + TRC_DEBUG(HEAP, "%s%s%s,storage=%d,free=%zu,size=%zu\n", nme, ext ? "." : "", ext ? ext : "", (int) h->storage, h->free, h->size); @@ -801,7 +801,12 @@ HEAPload_intern(Heap *h, const char *nme GDKfree(srcpath); GDKfree(dstpath); - h->base = GDKload(h->farmid, nme, ext, h->free, &h->size, h->newstorage); + if (h->storage == STORE_MEM && h->free == 0) { + h->base = GDKmalloc(h->size); + h->wasempty = true; + } else { + h->base = GDKload(h->farmid, nme, ext, h->free, &h->size, h->storage); + } if (h->base == NULL) return GDK_FAIL; /* file could not be read satisfactorily */ @@ -834,11 +839,22 @@ HEAPsave_intern(Heap *h, const char *nme { storage_t store = h->newstorage; long_str extension; + gdk_return rc; if (h->base == NULL) { GDKerror("no heap to save\n"); return GDK_FAIL; } + if (h->free == 0) { + /* nothing to see, please move on */ + h->wasempty = true; + TRC_DEBUG(HEAP, + "not saving: " + "(%s.%s,storage=%d,free=%zu,size=%zu,dosync=%s)\n", + nme?nme:"", ext, (int) h->newstorage, h->free, h->size, + dosync?"true":"false"); + return GDK_SUCCEED; + } if (h->storage != STORE_MEM && store == STORE_PRIV) { /* anonymous or private VM is saved as if it were malloced */ store = STORE_MEM; @@ -852,7 +868,10 @@ HEAPsave_intern(Heap *h, const char *nme "(%s.%s,storage=%d,free=%zu,size=%zu,dosync=%s)\n", nme?nme:"", ext, (int) h->newstorage, h->free, h->size, dosync?"true":"false"); - return GDKsave(h->farmid, nme, ext, h->base, h->free, store, dosync); + rc = GDKsave(h->farmid, nme, ext, h->base, h->free, store, dosync); + if (rc == GDK_SUCCEED) + h->wasempty = false; + return rc; } gdk_return diff --git a/gdk/gdk_storage.c b/gdk/gdk_storage.c --- a/gdk/gdk_storage.c +++ b/gdk/gdk_storage.c @@ -855,6 +855,9 @@ BATsave_locked(BAT *bd) if (b->tvheap) HEAPdecref(b->tvheap, false); if (err == GDK_SUCCEED) { + bd->theap->wasempty = hs.wasempty; + if (bd->tvheap) + bd->tvheap->wasempty = vhs.wasempty; bd->batCopiedtodisk = true; DESCclean(bd); if (bd->thash && bd->thash != (Hash *) 1) diff --git a/sql/storage/bat/bat_logger.c b/sql/storage/bat/bat_logger.c --- a/sql/storage/bat/bat_logger.c +++ b/sql/storage/bat/bat_logger.c @@ -2500,6 +2500,10 @@ snapshot_heap(stream *plan, const char * struct stat statbuf; int len; + if (extent == 0) { + /* nothing to copy */ + return GDK_SUCCEED; + } // first check the backup dir len = snprintf(path1, FILENAME_MAX, "%s/%s/%" PRIo64 "%s", db_dir, BAKDIR, batid, suffix); if (len == -1 || len >= FILENAME_MAX) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list