Changeset: 9f9a288a6902 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/9f9a288a6902
Modified Files:
        gdk/gdk.h
        gdk/gdk_bbp.c
        gdk/gdk_heap.c
        gdk/gdk_storage.c
        sql/storage/bat/bat_logger.c
Branch: Jul2021
Log Message:

Don't save empty heap files.


diffs (270 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -560,7 +560,8 @@ typedef struct {
        bool hashash:1,         /* the string heap contains hash values */
                cleanhash:1,    /* string heaps must clean hash */
                dirty:1,        /* specific heap dirty marker */
-               remove:1;       /* remove storage file when freeing */
+               remove:1,       /* remove storage file when freeing */
+               wasempty:1;     /* heap was empty when last saved/created */
        storage_t storage;      /* storage mode (mmap/malloc). */
        storage_t newstorage;   /* new desired storage mode at re-allocation. */
        bat parentid;           /* cache id of VIEW parent bat */
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -760,45 +760,47 @@ BBPcheckbats(unsigned bbpversion)
                        /* no files needed */
                        continue;
                }
-               path = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
-               if (path == NULL)
-                       return GDK_FAIL;
+               if (b->theap->free > 0) {
+                       path = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
+                       if (path == NULL)
+                               return GDK_FAIL;
 #ifdef GDKLIBRARY_TAILN
-               /* if bbpversion > GDKLIBRARY_TAILN, the offset heap can
-                * exist with either name .tail1 (etc) or .tail, if <=
-                * GDKLIBRARY_TAILN, only with .tail */
-               char tailsave = 0;
-               size_t taillen = 0;
-               if (b->ttype == TYPE_str &&
-                   b->twidth < SIZEOF_VAR_T) {
-                       /* old version: .tail, not .tail1, .tail2, .tail4 */
-                       taillen = strlen(path) - 1;
-                       tailsave = path[taillen];
-                       path[taillen] = 0;
-               }
+                       /* if bbpversion > GDKLIBRARY_TAILN, the offset heap can
+                        * exist with either name .tail1 (etc) or .tail, if <=
+                        * GDKLIBRARY_TAILN, only with .tail */
+                       char tailsave = 0;
+                       size_t taillen = 0;
+                       if (b->ttype == TYPE_str &&
+                           b->twidth < SIZEOF_VAR_T) {
+                               /* old version: .tail, not .tail1, .tail2, 
.tail4 */
+                               taillen = strlen(path) - 1;
+                               tailsave = path[taillen];
+                               path[taillen] = 0;
+                       }
 #endif
-               if (MT_stat(path, &statb) < 0
+                       if (MT_stat(path, &statb) < 0
 #ifdef GDKLIBRARY_TAILN
-                   && bbpversion > GDKLIBRARY_TAILN
-                   && b->ttype == TYPE_str
-                   && b->twidth < SIZEOF_VAR_T
-                   && (path[taillen] = tailsave) != 0
-                   && MT_stat(path, &statb) < 0
+                           && bbpversion > GDKLIBRARY_TAILN
+                           && b->ttype == TYPE_str
+                           && b->twidth < SIZEOF_VAR_T
+                           && (path[taillen] = tailsave) != 0
+                           && MT_stat(path, &statb) < 0
 #endif
-                       ) {
-
-                       GDKsyserror("cannot stat file %s (expected size %zu)\n",
-                                   path, b->theap->free);
+                               ) {
+
+                               GDKsyserror("cannot stat file %s (expected size 
%zu)\n",
+                                           path, b->theap->free);
+                               GDKfree(path);
+                               return GDK_FAIL;
+                       }
+                       if ((size_t) statb.st_size < b->theap->free) {
+                               GDKerror("file %s too small (expected %zu, 
actual %zu)\n", path, b->theap->free, (size_t) statb.st_size);
+                               GDKfree(path);
+                               return GDK_FAIL;
+                       }
                        GDKfree(path);
-                       return GDK_FAIL;
                }
-               if ((size_t) statb.st_size < b->theap->free) {
-                       GDKerror("file %s too small (expected %zu, actual 
%zu)\n", path, b->theap->free, (size_t) statb.st_size);
-                       GDKfree(path);
-                       return GDK_FAIL;
-               }
-               GDKfree(path);
-               if (b->tvheap != NULL) {
+               if (b->tvheap != NULL && b->tvheap->free > 0) {
                        path = GDKfilepath(0, BATDIR, 
BBP_physical(b->batCacheid), "theap");
                        if (path == NULL)
                                return GDK_FAIL;
@@ -2386,7 +2388,7 @@ decref(bat i, bool logical, bool release
                 * while locked so no other thread thinks it's
                 * available anymore */
                assert((BBP_status(i) & BBPUNLOADING) == 0);
-               TRC_DEBUG(BAT_, "%s set to unloading BAT %d\n", func, i);
+               TRC_DEBUG(BAT_, "%s set to unloading BAT %d (status %u)\n", 
func, i, BBP_status(i));
                BBP_status_on(i, BBPUNLOADING);
                swap = true;
        }
@@ -2930,6 +2932,10 @@ do_backup(const char *srcdir, const char
        char extnew[16];
        bool istail = strncmp(ext, "tail", 4) == 0;
 
+       if (h->wasempty) {
+               return GDK_SUCCEED;
+       }
+
        /* direct mmap is unprotected (readonly usage, or has WAL
         * protection); however, if we're backing up for subcommit
         * and a backup already exists in the main backup directory
diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c
--- a/gdk/gdk_heap.c
+++ b/gdk/gdk_heap.c
@@ -103,6 +103,7 @@ HEAPgrow(const Heap *old, size_t size)
                .dirty = true,
                .remove = old->remove,
                .parentid = old->parentid,
+               .wasempty = old->wasempty,
        };
        memcpy(new->filename, old->filename, sizeof(new->filename));
        if (HEAPalloc(new, size, 1, 1) != GDK_SUCCEED) {
@@ -147,7 +148,7 @@ HEAPalloc(Heap *h, size_t nitems, size_t
             h->size < (h->farmid == 0 ? GDK_mmap_minsize_persistent : 
GDK_mmap_minsize_transient))) {
                h->storage = STORE_MEM;
                h->base = GDKmalloc(h->size);
-               TRC_DEBUG(HEAP, "HEAPalloc %zu %p\n", h->size, h->base);
+               TRC_DEBUG(HEAP, "%s %zu %p\n", h->filename, h->size, h->base);
        }
        if (!GDKinmemory(h->farmid) && h->base == NULL) {
                char *nme;
@@ -244,7 +245,7 @@ HEAPextend(Heap *h, size_t size, bool ma
                if (!must_mmap) {
                        h->newstorage = h->storage = STORE_MEM;
                        h->base = GDKrealloc(h->base, size);
-                       TRC_DEBUG(HEAP, "Extending malloced heap %zu %zu %p 
%p\n", size, h->size, bak.base, h->base);
+                       TRC_DEBUG(HEAP, "Extending malloced heap %s %zu %zu %p 
%p\n", h->filename, size, h->size, bak.base, h->base);
                        h->size = size;
                        if (h->base)
                                return GDK_SUCCEED; /* success */
@@ -288,7 +289,7 @@ HEAPextend(Heap *h, size_t size, bool ma
                                h->newstorage = h->storage;
 
                                h->base = NULL;
-                               TRC_DEBUG(HEAP, "Converting malloced to %s 
mmapped heap\n", h->newstorage == STORE_MMAP ? "shared" : "privately");
+                               TRC_DEBUG(HEAP, "Converting malloced to %s 
mmapped heap %s\n", h->newstorage == STORE_MMAP ? "shared" : "privately", 
h->filename);
                                /* try to allocate a memory-mapped based
                                 * heap */
                                if (HEAPload(h, nme, ext, false) == 
GDK_SUCCEED) {
@@ -337,10 +338,8 @@ HEAPshrink(Heap *h, size_t size)
        assert(size <= h->size);
        if (h->storage == STORE_MEM) {
                p = GDKrealloc(h->base, size);
-               TRC_DEBUG(HEAP, "Shrinking malloced "
-                         "heap %zu %zu %p "
-                         "%p\n", h->size, size,
-                         h->base, p);
+               TRC_DEBUG(HEAP, "Shrinking malloced heap %s %zu %zu %p %p\n",
+                         h->filename, h->size, size, h->base, p);
        } else {
                char *path;
 
@@ -548,6 +547,7 @@ GDKupgradevarheap(BAT *b, var_t v, BUN c
                .dirty = true,
                .remove = old->remove,
                .parentid = old->parentid,
+               .wasempty = old->wasempty,
        };
        settailname(new, BBP_physical(b->batCacheid), b->ttype, width);
        if (HEAPalloc(new, newsize, 1, 1) != GDK_SUCCEED) {
@@ -673,7 +673,7 @@ HEAPfree(Heap *h, bool rmheap)
 {
        if (h->base) {
                if (h->storage == STORE_MEM) {  /* plain memory */
-                       TRC_DEBUG(HEAP, "HEAPfree %zu %p\n", h->size, h->base);
+                       TRC_DEBUG(HEAP, "HEAPfree %s %zu %p\n", h->filename, 
h->size, h->base);
                        GDKfree(h->base);
                } else if (h->storage == STORE_CMEM) {
                        //heap is stored in regular C memory rather than GDK 
memory,so we call free()
@@ -773,7 +773,7 @@ HEAPload_intern(Heap *h, const char *nme
                }
        }
 
-       TRC_DEBUG(HEAP, "HEAPload(%s%s%s,storage=%d,free=%zu,size=%zu)\n",
+       TRC_DEBUG(HEAP, "%s%s%s,storage=%d,free=%zu,size=%zu\n",
                  nme, ext ? "." : "", ext ? ext : "",
                  (int) h->storage, h->free, h->size);
 
@@ -801,7 +801,12 @@ HEAPload_intern(Heap *h, const char *nme
        GDKfree(srcpath);
        GDKfree(dstpath);
 
-       h->base = GDKload(h->farmid, nme, ext, h->free, &h->size, 
h->newstorage);
+       if (h->storage == STORE_MEM && h->free == 0) {
+               h->base = GDKmalloc(h->size);
+               h->wasempty = true;
+       } else {
+               h->base = GDKload(h->farmid, nme, ext, h->free, &h->size, 
h->storage);
+       }
        if (h->base == NULL)
                return GDK_FAIL; /* file could  not be read satisfactorily */
 
@@ -834,11 +839,22 @@ HEAPsave_intern(Heap *h, const char *nme
 {
        storage_t store = h->newstorage;
        long_str extension;
+       gdk_return rc;
 
        if (h->base == NULL) {
                GDKerror("no heap to save\n");
                return GDK_FAIL;
        }
+       if (h->free == 0) {
+               /* nothing to see, please move on */
+               h->wasempty = true;
+               TRC_DEBUG(HEAP,
+                         "not saving: "
+                         "(%s.%s,storage=%d,free=%zu,size=%zu,dosync=%s)\n",
+                         nme?nme:"", ext, (int) h->newstorage, h->free, 
h->size,
+                         dosync?"true":"false");
+               return GDK_SUCCEED;
+       }
        if (h->storage != STORE_MEM && store == STORE_PRIV) {
                /* anonymous or private VM is saved as if it were malloced */
                store = STORE_MEM;
@@ -852,7 +868,10 @@ HEAPsave_intern(Heap *h, const char *nme
                  "(%s.%s,storage=%d,free=%zu,size=%zu,dosync=%s)\n",
                  nme?nme:"", ext, (int) h->newstorage, h->free, h->size,
                  dosync?"true":"false");
-       return GDKsave(h->farmid, nme, ext, h->base, h->free, store, dosync);
+       rc = GDKsave(h->farmid, nme, ext, h->base, h->free, store, dosync);
+       if (rc == GDK_SUCCEED)
+               h->wasempty = false;
+       return rc;
 }
 
 gdk_return
diff --git a/gdk/gdk_storage.c b/gdk/gdk_storage.c
--- a/gdk/gdk_storage.c
+++ b/gdk/gdk_storage.c
@@ -855,6 +855,9 @@ BATsave_locked(BAT *bd)
        if (b->tvheap)
                HEAPdecref(b->tvheap, false);
        if (err == GDK_SUCCEED) {
+               bd->theap->wasempty = hs.wasempty;
+               if (bd->tvheap)
+                       bd->tvheap->wasempty = vhs.wasempty;
                bd->batCopiedtodisk = true;
                DESCclean(bd);
                if (bd->thash && bd->thash != (Hash *) 1)
diff --git a/sql/storage/bat/bat_logger.c b/sql/storage/bat/bat_logger.c
--- a/sql/storage/bat/bat_logger.c
+++ b/sql/storage/bat/bat_logger.c
@@ -2500,6 +2500,10 @@ snapshot_heap(stream *plan, const char *
        struct stat statbuf;
        int len;
 
+       if (extent == 0) {
+               /* nothing to copy */
+               return GDK_SUCCEED;
+       }
        // first check the backup dir
        len = snprintf(path1, FILENAME_MAX, "%s/%s/%" PRIo64 "%s", db_dir, 
BAKDIR, batid, suffix);
        if (len == -1 || len >= FILENAME_MAX) {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to