Changeset: db8144a929c8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/db8144a929c8
Branch: sqloptimizer
Log Message:
Merged with default
diffs (truncated from 2926 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.test
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -33008,6 +33008,11 @@ similarity
command battxtsim.similarity(X_0:bat[:str], X_1:bat[:str]):bat[:dbl]
fstrcmp0_impl_bulk;
Normalized edit distance between two strings
+baturl
+extractURLHost
+command baturl.extractURLHost(X_0:bat[:str], X_1:bit):bat[:str]
+BATextractURLHost;
+Extract host from BAT of URLs
batuuid
isaUUID
command batuuid.isaUUID(X_0:bat[:str]):bat[:bit]
@@ -47999,6 +48004,11 @@ command txtsim.stringdiff(X_0:str, X_1:s
stringdiff_impl;
calculate the soundexed editdistance
url
+extractURLHost
+command url.extractURLHost(X_0:str, X_1:bit):str
+extractURLHost;
+Extract host from a URL relaxed version
+url
getAnchor
command url.getAnchor(X_0:url):str
URLgetAnchor;
@@ -48032,7 +48042,7 @@ url
getHost
command url.getHost(X_0:url):str
URLgetHost;
-Extract the server name from the URL
+Extract the server name from the URL strict version
url
getPort
command url.getPort(X_0:url):str
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -758,8 +758,9 @@ typedef struct {
#define GDKLIBRARY_MINMAX_POS 061042U /* first in Nov2019: no min/max
position; no BBPinfo value */
#define GDKLIBRARY_TAILN 061043U /* first in Jul2021: str offset heaps
names don't take width into account */
#define GDKLIBRARY_HASHASH 061044U /* first in Jul2021: hashash bit in
string heaps */
+#define GDKLIBRARY_HSIZE 061045U /* first in Jan2022: heap "size" values
*/
/* if the version number is updated, also fix snapshot_bats() in bat_logger.c
*/
-#define GDKLIBRARY 061045U /* first after Jul2021 */
+#define GDKLIBRARY 061046U /* first after Jan2022 */
typedef struct BAT {
/* static bat properties */
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -180,28 +180,6 @@ BATsetdims(BAT *b, uint16_t width)
}
const char *
-gettailnamebi(const BATiter *bi)
-{
- if (bi->type == TYPE_str) {
- switch (bi->width) {
- case 1:
- return "tail1";
- case 2:
- return "tail2";
- case 4:
-#if SIZEOF_VAR_T == 8
- return "tail4";
- case 8:
-#endif
- break;
- default:
- MT_UNREACHABLE();
- }
- }
- return "tail";
-}
-
-const char *
gettailname(const BAT *b)
{
if (b->ttype == TYPE_str) {
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -443,6 +443,8 @@ heapinit(BAT *b, const char *buf,
(void) bbpversion; /* could be used to implement compatibility */
minpos = maxpos = (uint64_t) oid_nil; /* for GDKLIBRARY_MINMAX_POS case
*/
+ size = 0; /* for GDKLIBRARY_HSIZE case */
+ storage = STORE_INVALID; /* for GDKLIBRARY_HSIZE case */
if (bbpversion <= GDKLIBRARY_MINMAX_POS ?
sscanf(buf,
" %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
@@ -453,6 +455,7 @@ heapinit(BAT *b, const char *buf,
&nokey1, &nosorted, &norevsorted, &base,
&free, &size, &storage,
&n) < 12 :
+ bbpversion <= GDKLIBRARY_HSIZE ?
sscanf(buf,
" %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
" %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
@@ -461,7 +464,16 @@ heapinit(BAT *b, const char *buf,
type, &width, &var, &properties, &nokey0,
&nokey1, &nosorted, &norevsorted, &base,
&free, &size, &storage, &minpos, &maxpos,
- &n) < 14) {
+ &n) < 14 :
+ sscanf(buf,
+ " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
+ " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
+ " %" SCNu64 " %" SCNu64 " %" SCNu64
+ "%n",
+ type, &width, &var, &properties, &nokey0,
+ &nokey1, &nosorted, &norevsorted, &base,
+ &free, &minpos, &maxpos,
+ &n) < 12) {
TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d",
lineno);
return -1;
}
@@ -515,7 +527,14 @@ heapinit(BAT *b, const char *buf,
/* (properties & 0x0200) is the old tdense flag */
b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil
? oid_nil : (oid) base;
b->theap->free = (size_t) free;
- b->theap->size = (size_t) size;
+ /* set heap size to match capacity */
+ if (b->ttype == TYPE_msk) {
+ /* round up capacity to multiple of 32 */
+ b->batCapacity = (b->batCapacity + 31) & ~((BUN) 31);
+ b->theap->size = b->batCapacity / 8;
+ } else {
+ b->theap->size = (size_t) b->batCapacity << b->tshift;
+ }
b->theap->base = NULL;
settailname(b->theap, filename, t, width);
b->theap->storage = STORE_INVALID;
@@ -531,25 +550,30 @@ heapinit(BAT *b, const char *buf,
b->tmaxpos = (BUN) maxpos;
else
b->tmaxpos = BUN_NONE;
- if (b->theap->free > b->theap->size) {
- TRC_CRITICAL(GDK, "\"free\" value larger than \"size\" in heap
of bat %d on line %d\n", (int) bid, lineno);
- return -1;
- }
return n;
}
static int
-vheapinit(BAT *b, const char *buf, bat bid, const char *filename, int lineno)
+vheapinit(BAT *b, const char *buf, bat bid, unsigned bbpversion, const char
*filename, int lineno)
{
int n = 0;
uint64_t free, size;
uint16_t storage;
+ (void) bbpversion; /* could be used to implement compatibility */
+
+ size = 0; /* for GDKLIBRARY_HSIZE case */
+ storage = STORE_INVALID; /* for GDKLIBRARY_HSIZE case */
if (b->tvarsized && b->ttype != TYPE_void) {
- if (sscanf(buf,
+ if (bbpversion <= GDKLIBRARY_HSIZE ?
+ sscanf(buf,
" %" SCNu64 " %" SCNu64 " %" SCNu16
"%n",
- &free, &size, &storage, &n) < 3) {
+ &free, &size, &storage, &n) < 3 :
+ sscanf(buf,
+ " %" SCNu64
+ "%n",
+ &free, &n) < 1) {
TRC_CRITICAL(GDK, "invalid format for BBP.dir on line
%d", lineno);
return -1;
}
@@ -558,6 +582,13 @@ vheapinit(BAT *b, const char *buf, bat b
TRC_CRITICAL(GDK, "cannot allocate memory for heap.");
return -1;
}
+ if (ATOMstorage(b->ttype) == TYPE_str &&
+ free < GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY *
GDK_VARALIGN)
+ size = GDK_STRHASHTABLE * sizeof(stridx_t) + BATTINY *
GDK_VARALIGN;
+ else if (free < 512)
+ size = 512;
+ else
+ size = free;
*b->tvheap = (Heap) {
.free = (size_t) free,
.size = (size_t) size,
@@ -572,10 +603,6 @@ vheapinit(BAT *b, const char *buf, bat b
strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
filename, ".theap", NULL);
ATOMIC_INIT(&b->tvheap->refs, 1);
- if (b->tvheap->free > b->tvheap->size) {
- TRC_CRITICAL(GDK, "\"free\" value larger than \"size\"
in var heap of bat %d on line %d\n", (int) bid, lineno);
- return -1;
- }
}
return n;
}
@@ -605,7 +632,7 @@ BBPreadEntries(FILE *fp, unsigned bbpver
int nread, n;
char *s, *options = NULL;
char logical[1024];
- uint64_t count, capacity, base = 0;
+ uint64_t count, capacity = 0, base = 0;
#ifdef GDKLIBRARY_HASHASH
int Thashash;
#endif
@@ -621,14 +648,21 @@ BBPreadEntries(FILE *fp, unsigned bbpver
*s = 0;
}
- if (sscanf(buf,
+ if (bbpversion <= GDKLIBRARY_HSIZE ?
+ sscanf(buf,
"%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64
" %" SCNu64 " %" SCNu64
"%n",
&batid, &status, headname, filename,
- &properties,
- &count, &capacity, &base,
- &nread) < 8) {
+ &properties, &count, &capacity, &base,
+ &nread) < 8 :
+ sscanf(buf,
+ "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64
+ " %" SCNu64
+ "%n",
+ &batid, &status, headname, filename,
+ &properties, &count, &base,
+ &nread) < 7) {
TRC_CRITICAL(GDK, "invalid format for BBP.dir on line
%d", lineno);
goto bailout;
}
@@ -680,7 +714,8 @@ BBPreadEntries(FILE *fp, unsigned bbpver
bn->batRestricted = (properties & 0x06) >> 1;
bn->batCount = (BUN) count;
bn->batInserted = bn->batCount;
- bn->batCapacity = (BUN) capacity;
+ /* set capacity to at least count */
+ bn->batCapacity = (BUN) count <= BATTINY ? BATTINY : (BUN)
count;
char name[MT_NAME_LEN];
snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /*
fits */
MT_lock_init(&bn->theaplock, name);
@@ -706,7 +741,7 @@ BBPreadEntries(FILE *fp, unsigned bbpver
goto bailout;
}
nread += n;
- n = vheapinit(bn, buf + nread, bid, filename, lineno);
+ n = vheapinit(bn, buf + nread, bid, bbpversion, filename,
lineno);
if (n < 0) {
BATdestroy(bn);
goto bailout;
@@ -925,6 +960,7 @@ BBPheader(FILE *fp, int *lineno, bat *bb
return 0;
}
if (bbpversion != GDKLIBRARY &&
+ bbpversion != GDKLIBRARY_HSIZE &&
bbpversion != GDKLIBRARY_HASHASH &&
bbpversion != GDKLIBRARY_TAILN &&
bbpversion != GDKLIBRARY_MINMAX_POS) {
@@ -1805,8 +1841,9 @@ BBPexit(void)
* reclaimed as well.
*/
static inline int
-heap_entry(FILE *fp, BAT *b, BUN size, BATiter *bi)
+heap_entry(FILE *fp, BATiter *bi, BUN size)
{
+ BAT *b = bi->b;
size_t free = bi->hfree;
if (size < BUN_NONE) {
if ((bi->type >= 0 && ATOMstorage(bi->type) == TYPE_msk))
@@ -1818,7 +1855,7 @@ heap_entry(FILE *fp, BAT *b, BUN size, B
}
if ((GDKdebug & TAILCHKMASK) && free > 0) {
- char *fname = GDKfilepath(0, BATDIR,
BBP_physical(b->batCacheid), gettailname(b));
+ char *fname = GDKfilepath(0, BATDIR,
BBP_physical(b->batCacheid), gettailnamebi(bi));
if (fname != NULL) {
struct stat stb;
if (stat(fname, &stb) == -1) {
@@ -1834,7 +1871,7 @@ heap_entry(FILE *fp, BAT *b, BUN size, B
}
return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " "
- BUNFMT " " OIDFMT " %zu %zu %d %" PRIu64" %" PRIu64,
+ BUNFMT " " OIDFMT " %zu %" PRIu64" %" PRIu64,
bi->type >= 0 ? BATatoms[bi->type].name :
ATOMunknown_name(bi->type),
bi->width,
b->tvarsized,
@@ -1850,25 +1887,23 @@ heap_entry(FILE *fp, BAT *b, BUN size, B
b->tnorevsorted >= size ? 0 : b->tnorevsorted,
b->tseqbase,
free,
- bi->h->size,
- 0,
- bi->minpos < b->hseqbase + size ? (uint64_t) bi->minpos
: (uint64_t) oid_nil,
- bi->maxpos < b->hseqbase + size ? (uint64_t) bi->maxpos
: (uint64_t) oid_nil);
+ bi->minpos < size ? (uint64_t) bi->minpos : (uint64_t)
oid_nil,
+ bi->maxpos < size ? (uint64_t) bi->maxpos : (uint64_t)
oid_nil);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]