Changeset: 2c7f3ffefd38 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2c7f3ffefd38
Modified Files:
        gdk/gdk.h
        gdk/gdk_bat.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_storage.c
Branch: strheapvacuum
Log Message:

merge default


diffs (truncated from 30948 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -5753,6 +5753,7 @@ stdout of test 'MAL-signatures` in direc
 [ "batcalc",   "uuid", "command batcalc.uuid(X_0:bat[:str], 
X_1:bat[:oid]):bat[:uuid] ",       "UUIDstr2uuid_bulk;",   ""      ]
 [ "batcalc",   "uuid", "command batcalc.uuid(X_0:bat[:uuid], 
X_1:bat[:oid]):bat[:uuid] ",      "UUIDuuid2uuid_bulk;",  ""      ]
 [ "batcalc",   "wkb",  "command batcalc.wkb(X_0:bat[:wkb], X_1:bat[:oid], 
X_2:int, X_3:int):bat[:wkb] ",       "geom_2_geom_bat;",     ""      ]
+[ "batcalc",   "wkb",  "command batcalc.wkb(X_0:bat[:str], X_1:bat[:oid], 
X_2:int, X_3:int):bat[:wkb] ",       "wkbFromText_bat_cand;",        ""      ]
 [ "batcalc",   "xml",  "command batcalc.xml(X_0:bat[:str]):bat[:xml] ",        
"BATXMLstr2xml;",       ""      ]
 [ "batcalc",   "xor",  "pattern batcalc.xor(X_0:bat[:bit], 
X_1:bat[:bit]):bat[:bit] ", "CMDbatXOR;",   ""      ]
 [ "batcalc",   "xor",  "pattern batcalc.xor(X_0:bat[:bit], X_1:bat[:bit], 
X_2:bat[:oid], X_3:bat[:oid]):bat[:bit] ",   "CMDbatXOR;",   ""      ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -8277,6 +8277,7 @@ stdout of test 'MAL-signatures` in direc
 [ "batcalc",   "uuid", "command batcalc.uuid(X_0:bat[:str], 
X_1:bat[:oid]):bat[:uuid] ",       "UUIDstr2uuid_bulk;",   ""      ]
 [ "batcalc",   "uuid", "command batcalc.uuid(X_0:bat[:uuid], 
X_1:bat[:oid]):bat[:uuid] ",      "UUIDuuid2uuid_bulk;",  ""      ]
 [ "batcalc",   "wkb",  "command batcalc.wkb(X_0:bat[:wkb], X_1:bat[:oid], 
X_2:int, X_3:int):bat[:wkb] ",       "geom_2_geom_bat;",     ""      ]
+[ "batcalc",   "wkb",  "command batcalc.wkb(X_0:bat[:str], X_1:bat[:oid], 
X_2:int, X_3:int):bat[:wkb] ",       "wkbFromText_bat_cand;",        ""      ]
 [ "batcalc",   "xml",  "command batcalc.xml(X_0:bat[:str]):bat[:xml] ",        
"BATXMLstr2xml;",       ""      ]
 [ "batcalc",   "xor",  "pattern batcalc.xor(X_0:bat[:bit], 
X_1:bat[:bit]):bat[:bit] ", "CMDbatXOR;",   ""      ]
 [ "batcalc",   "xor",  "pattern batcalc.xor(X_0:bat[:bit], X_1:bat[:bit], 
X_2:bat[:oid], X_3:bat[:oid]):bat[:bit] ",   "CMDbatXOR;",   ""      ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -187,7 +187,7 @@ BAT *BATsample_with_seed(BAT *b, BUN n, 
 gdk_return BATsave(BAT *b) __attribute__((__warn_unused_result__));
 BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool 
hi, bool anti);
 gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
bool nil_matches, bool max_one, BUN estimate) 
__attribute__((__warn_unused_result__));
-gdk_return BATsetaccess(BAT *b, restrict_t mode);
+BAT *BATsetaccess(BAT *b, restrict_t mode) 
__attribute__((__warn_unused_result__));
 void BATsetcapacity(BAT *b, BUN cnt);
 void BATsetcount(BAT *b, BUN cnt);
 BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/common/stream/stdio_stream.c b/common/stream/stdio_stream.c
--- a/common/stream/stdio_stream.c
+++ b/common/stream/stdio_stream.c
@@ -31,9 +31,8 @@ file_read(stream *restrict s, void *rest
                return -1;
        }
 
-       if (elmsize && cnt && !feof(fp)) {
-               if (ferror(fp) ||
-                   ((rc = fread(buf, elmsize, cnt, fp)) == 0 && ferror(fp))) {
+       if (elmsize && cnt) {
+               if ((rc = fread(buf, elmsize, cnt, fp)) == 0 && ferror(fp)) {
                        mnstr_set_error_errno(s, MNSTR_READ_ERROR, "read 
error");
                        return -1;
                }
@@ -55,7 +54,7 @@ file_write(stream *restrict s, const voi
        if (elmsize && cnt) {
                size_t rc = fwrite(buf, elmsize, cnt, fp);
 
-               if (ferror(fp)) {
+               if (!rc && ferror(fp)) {
                        mnstr_set_error_errno(s, MNSTR_WRITE_ERROR, "write 
error");
                        return -1;
                }
diff --git a/common/utils/mcrypt.c b/common/utils/mcrypt.c
--- a/common/utils/mcrypt.c
+++ b/common/utils/mcrypt.c
@@ -505,6 +505,8 @@ mcrypt_hashPassword(
        } else
 #endif
        {
+               (void) len;
+               (void) ret;
                (void) algo;
                (void) password;
                (void) challenge;
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -331,6 +331,8 @@
 #include <limits.h>            /* for *_MIN and *_MAX */
 #include <float.h>             /* for FLT_MAX and DBL_MAX */
 
+typedef enum { GDK_FAIL, GDK_SUCCEED } gdk_return;
+
 #include "gdk_system.h"
 #include "gdk_posix.h"
 #include "stream.h"
@@ -524,8 +526,6 @@ typedef size_t BUN;
 /*
  * @- Checking and Error definitions:
  */
-typedef enum { GDK_FAIL, GDK_SUCCEED } gdk_return;
-
 #define ATOMextern(t)  (ATOMstorage(t) >= TYPE_str)
 
 typedef enum {
@@ -776,14 +776,6 @@ typedef struct BAT {
        MT_Lock batIdxLock;     /* lock to manipulate other indexes/properties 
*/
 } BAT;
 
-typedef struct BATiter {
-       BAT *b;
-       union {
-               oid tvid;
-               bool tmsk;
-       };
-} BATiter;
-
 /* macros to hide complexity of the BAT structure */
 #define ttype          T.type
 #define tkey           T.key
@@ -879,6 +871,131 @@ gdk_export size_t HEAPmemsize(Heap *h);
 gdk_export void HEAPdecref(Heap *h, bool remove);
 gdk_export void HEAPincref(Heap *h);
 
+/* BAT iterator, also protects use of BAT heaps with reference counts.
+ *
+ * A BAT iterator has to be used with caution, but it does have to be
+ * used in many place.
+ *
+ * An iterator is initialized by assigning it the result of a call to
+ * either bat_iterator or bat_iterator_nolock.  The former must be
+ * accompanied by a call to bat_iterator_end to release resources.
+ *
+ * bat_iterator should be used for BATs that could possibly be modified
+ * in another thread while we're reading the contents of the BAT.
+ * Alternatively, but only for very quick access, the theaplock can be
+ * taken, the data read, and the lock released.  For longer duration
+ * accesses, it is better to use the iterator, even without the BUNt*
+ * macros, since the theaplock is only held very briefly.
+ *
+ * If BATs are to be modified, higher level code must assure that no
+ * other thread is going to modify the same BAT at the same time.  A
+ * to-be-modified BAT should not use bat_iterator.  It can use
+ * bat_iterator_nolock, but be aware that this creates a copy of the
+ * heap pointer(s) (i.e. theap and tvheap) and if the heaps get
+ * extended, the pointers in the BAT structure may be modified, but that
+ * does not modify the pointers in the iterator.  This means that after
+ * operations that may grow a heap, the iterator should be
+ * reinitialized.
+ *
+ * The BAT iterator provides a number of fields that can (and often
+ * should) be used to access information about the BAT.  For string
+ * BATs, if a parallel threads adds values, the offset heap (theap) may
+ * get replaced by a one that is wider.  This involves changing the
+ * twidth and tshift values in the BAT structure.  These changed values
+ * should not be used to access the data in the iterator.  Instead, use
+ * the width and shift values in the iterator itself.
+ */
+typedef struct BATiter {
+       BAT *b;
+       Heap *h;
+       void *base;
+       Heap *vh;
+       BUN count;
+       uint16_t width;
+       uint8_t shift;
+       int8_t type;
+       oid tseq;
+       union {
+               oid tvid;
+               bool tmsk;
+       };
+#ifndef NDEBUG
+       bool locked;
+#endif
+} BATiter;
+
+static inline BATiter
+bat_iterator(BAT *b)
+{
+       /* needs matching bat_iterator_end */
+       BATiter bi;
+       if (b) {
+               MT_lock_set(&b->theaplock);
+               bi = (BATiter) {
+                       .b = b,
+                       .h = b->theap,
+                       .base = b->theap->base ? b->theap->base + (b->tbaseoff 
<< b->tshift) : NULL,
+                       .vh = b->tvheap,
+                       .count = b->batCount,
+                       .width = b->twidth,
+                       .shift = b->tshift,
+                       .type = b->ttype,
+                       .tseq = b->tseqbase,
+#ifndef NDEBUG
+                       .locked = true,
+#endif
+               };
+               HEAPincref(bi.h);
+               if (bi.vh)
+                       HEAPincref(bi.vh);
+               MT_lock_unset(&b->theaplock);
+       } else {
+               bi = (BATiter) {
+                       .b = NULL,
+#ifndef NDEBUG
+                       .locked = true,
+#endif
+               };
+       }
+       return bi;
+}
+
+static inline void
+bat_iterator_end(BATiter *bip)
+{
+       /* matches bat_iterator */
+       assert(bip);
+       assert(bip->locked);
+       if (bip->h)
+               HEAPdecref(bip->h, false);
+       if (bip->vh)
+               HEAPdecref(bip->vh, false);
+       *bip = (BATiter) {0};
+}
+
+static inline BATiter
+bat_iterator_nolock(BAT *b)
+{
+       /* does not get matched by bat_iterator_end */
+       if (b) {
+               return (BATiter) {
+                       .b = b,
+                       .h = b->theap,
+                       .base = b->theap->base ? b->theap->base + (b->tbaseoff 
<< b->tshift) : NULL,
+                       .vh = b->tvheap,
+                       .count = b->batCount,
+                       .width = b->twidth,
+                       .shift = b->tshift,
+                       .type = b->ttype,
+                       .tseq = b->tseqbase,
+#ifndef NDEBUG
+                       .locked = false,
+#endif
+               };
+       }
+       return (BATiter) {0};
+}
+
 /*
  * @- Internal HEAP Chunk Management
  * Heaps are used in BATs to store data for variable-size atoms.  The
@@ -995,7 +1112,6 @@ gdk_export BUN BUNfnd(BAT *b, const void
         (BUN) (*(const oid*)(v) - (b)->tseqbase))
 
 #define BATttype(b)    (BATtdense(b) ? TYPE_oid : (b)->ttype)
-#define Tbase(b)       ((b)->tvheap->base)
 
 #define Tsize(b)       ((b)->twidth)
 
@@ -1011,12 +1127,12 @@ typedef var_t stridx_t;
 #define SIZEOF_STRIDX_T SIZEOF_VAR_T
 #define GDK_VARALIGN SIZEOF_STRIDX_T
 
-#define BUNtvaroff(bi,p) VarHeapVal(Tloc((bi).b, 0), (p), (bi).b->twidth)
+#define BUNtvaroff(bi,p) VarHeapVal((bi).base, (p), (bi).width)
 
-#define BUNtloc(bi,p)  (ATOMstorage((bi).b->ttype) == TYPE_msk ? Tmsk(&(bi), 
p) : Tloc((bi).b,p))
+#define BUNtloc(bi,p)  (ATOMstorage((bi).type) == TYPE_msk ? Tmsk(&(bi), p) : 
(void *) ((char *) (bi).base + ((p) << (bi).shift)))
 #define BUNtpos(bi,p)  Tpos(&(bi),p)
-#define BUNtvar(bi,p)  (assert((bi).b->ttype && (bi).b->tvarsized), (void *) 
(Tbase((bi).b)+BUNtvaroff(bi,p)))
-#define BUNtail(bi,p)  
((bi).b->ttype?(bi).b->tvarsized?BUNtvar(bi,p):BUNtloc(bi,p):BUNtpos(bi,p))
+#define BUNtvar(bi,p)  (assert((bi).type && (bi).b->tvarsized), (void *) 
((bi).vh->base+BUNtvaroff(bi,p)))
+#define BUNtail(bi,p)  
((bi).type?(bi).b->tvarsized?BUNtvar(bi,p):BUNtloc(bi,p):BUNtpos(bi,p))
 
 #define BUNlast(b)     (assert((b)->batCount <= BUN_MAX), (b)->batCount)
 
@@ -1031,6 +1147,8 @@ typedef var_t stridx_t;
 static inline oid
 BUNtoid(BAT *b, BUN p)
 {
+       oid o;
+
        assert(ATOMtype(b->ttype) == TYPE_oid);
        /* BATcount is the number of valid entries, so with
         * exceptions, the last value can well be larger than
@@ -1039,13 +1157,18 @@ BUNtoid(BAT *b, BUN p)
        assert(b->ttype == TYPE_void || b->tvheap == NULL);
        if (is_oid_nil(b->tseqbase)) {
                if (b->ttype == TYPE_void)
-                       return b->tseqbase;
-               return ((const oid *) b->theap->base)[p + b->tbaseoff];
+                       return b->tseqbase; /* i.e. oid_nil */
+               MT_lock_set(&b->theaplock);
+               o = ((const oid *) b->theap->base)[p + b->tbaseoff];
+               MT_lock_unset(&b->theaplock);
+               return o;
        }
-       oid o = b->tseqbase + p;
+       o = b->tseqbase + p;
        if (b->ttype == TYPE_oid || b->tvheap == NULL) {
                return o;
        }
+       /* b->tvheap != NULL, so we know there will be no parallel
+        * modifications (so no locking) */
        assert(!mask_cand(b));
        /* exceptions only allowed on transient BATs */
        assert(b->batRole == TRANSIENT);
@@ -1072,8 +1195,6 @@ BUNtoid(BAT *b, BUN p)
        return o + hi;
 }
 
-#define bat_iterator(_b)       ((BATiter) {.b = (_b), .tvid = 0})
-
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to