CVS commit: src/lib/libc/cdb
Module Name:src Committed By: joerg Date: Thu Jan 7 14:41:50 UTC 2021 Modified Files: src/lib/libc/cdb: cdbw.c Log Message: Optimize CPU and memory use of cdbw(3) Reduce memory footprint and processing time by dropping the vertex parts of the edges kept during the peeling. Hook up the division-by-multiplication logic to help older platforms. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/lib/libc/cdb/cdbw.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.c diff -u src/lib/libc/cdb/cdbw.c:1.6 src/lib/libc/cdb/cdbw.c:1.7 --- src/lib/libc/cdb/cdbw.c:1.6 Sat Nov 11 18:05:31 2017 +++ src/lib/libc/cdb/cdbw.c Thu Jan 7 14:41:50 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: cdbw.c,v 1.6 2017/11/11 18:05:31 alnsn Exp $ */ +/* $NetBSD: cdbw.c,v 1.7 2021/01/07 14:41:50 joerg Exp $ */ /*- * Copyright (c) 2009, 2010, 2015 The NetBSD Foundation, Inc. * All rights reserved. @@ -36,7 +36,7 @@ #endif #include -__RCSID("$NetBSD: cdbw.c,v 1.6 2017/11/11 18:05:31 alnsn Exp $"); +__RCSID("$NetBSD: cdbw.c,v 1.7 2021/01/07 14:41:50 joerg Exp $"); #include "namespace.h" @@ -49,6 +49,74 @@ __RCSID("$NetBSD: cdbw.c,v 1.6 2017/11/1 #include #include +#if !HAVE_NBTOOL_CONFIG_H +#include +#else +static inline int +my_fls32(uint32_t n) +{ + int v; + + if (!n) + return 0; + + v = 32; + if ((n & 0xU) == 0) { + n <<= 16; + v -= 16; + } + if ((n & 0xFF00U) == 0) { + n <<= 8; + v -= 8; + } + if ((n & 0xF000U) == 0) { + n <<= 4; + v -= 4; + } + if ((n & 0xC000U) == 0) { + n <<= 2; + v -= 2; + } + if ((n & 0x8000U) == 0) { + n <<= 1; + v -= 1; + } + return v; +} + +static inline void +fast_divide32_prepare(uint32_t div, uint32_t * m, +uint8_t *s1, uint8_t *s2) +{ + uint64_t mt; + int l; + + l = my_fls32(div - 1); + mt = (uint64_t)(0x1ULL * ((1ULL << l) - div)); + *m = (uint32_t)(mt / div + 1); + *s1 = (l > 1) ? 1U : (uint8_t)l; + *s2 = (l == 0) ? 0 : (uint8_t)(l - 1); +} + +static inline uint32_t +fast_divide32(uint32_t v, uint32_t div, uint32_t m, uint8_t s1, +uint8_t s2) +{ + uint32_t t; + + t = (uint32_t)(((uint64_t)v * m) >> 32); + return (t + ((v - t) >> s1)) >> s2; +} + +static inline uint32_t +fast_remainder32(uint32_t v, uint32_t div, uint32_t m, uint8_t s1, +uint8_t s2) +{ + + return v - div * fast_divide32(v, div, m, s1, s2); +} +#endif + #ifdef __weak_alias __weak_alias(cdbw_close,_cdbw_close) __weak_alias(cdbw_open,_cdbw_open) @@ -279,30 +347,29 @@ cdbw_stable_seeder(void) } /* - * The algorithm below is based on paper - * Cache-Oblivious Peeling of Random Hypergraphs by Djamal Belazzougui, - * Paolo Boldi, Giuseppe Ottaviano, Rossano Venturini, and Sebastiano - * Vigna. - * http://zola.di.unipi.it/rossano/wp-content/papercite-data/pdf/dcc14.pdf - */ - -/* - * Data type for a valid oriented edge (v0, v1, v2), v1 < v2. - * The first vertex v0 is implicit and is determined by an index - * of the corresponding element in the state->oedges array. - * If the degree of v0 is greater than 1, other members don't - * make sense because they're a result of XORing multiple values. + * For each vertex in the 3-graph, the incidence lists needs to be kept. + * Avoid storing the full list by just XORing the indices of the still + * incident edges and remember the number of such edges as that's all + * the peeling computation needs. This is inspired by: + * Cache-Oblivious Peeling of Random Hypergraphs by Djamal Belazzougui, + * Paolo Boldi, Giuseppe Ottaviano, Rossano Venturini, and Sebastiano + * Vigna. https://arxiv.org/abs/1312.0526 + * + * Unlike in the paper, we don't care about external storage and have + * the edge list at hand all the time. As such, no ordering is necessary + * and the vertices of the edge don't have to be copied. + * + * The core observation of the paper above is that for a degree of one, + * the incident edge can be obtained directly. */ -struct oedge { - uint32_t degree; /* Degree of v0. */ - uint32_t verts[2]; /* v1 and v2 */ - uint32_t edge; +struct vertex { + uint32_t degree; + uint32_t edges; }; struct edge { + uint32_t vertices[3]; uint32_t idx; - - uint32_t left, middle, right; }; struct state { @@ -314,48 +381,40 @@ struct state { uint32_t *g; char *visited; - struct oedge *oedges; + struct vertex *vertices; struct edge *edges; uint32_t output_index; uint32_t *output_order; }; /* - * Add (delta == 1) or remove (delta == -1) the edge e from vertex v0. + * Add (delta == 1) or remove (delta == -1) the edge e + * from the incidence lists. */ static inline void -add_remove_edge(struct oedge *o, int delta, uint32_t e, -uint32_t v0, uint32_t v1, uint32_t v2) -{ - - o[v0].verts[v1 < v2 ? 0 : 1] ^= v1; - o[v0].verts[v1 < v2 ? 1 : 0] ^= v2; - o[v0].degree += delta; - o[v0].edge ^= e; -} - -static inline void -add_edge(struct oedge *o, uint32_t e, -
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: wiz Date: Sun Apr 25 10:32:44 UTC 2010 Modified Files: src/lib/libc/cdb: cdb.5 cdbr.3 cdbw.3 Log Message: Various fixes, mostly for typos. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/lib/libc/cdb/cdb.5 src/lib/libc/cdb/cdbr.3 \ src/lib/libc/cdb/cdbw.3 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdb.5 diff -u src/lib/libc/cdb/cdb.5:1.1 src/lib/libc/cdb/cdb.5:1.2 --- src/lib/libc/cdb/cdb.5:1.1 Sun Apr 25 00:54:46 2010 +++ src/lib/libc/cdb/cdb.5 Sun Apr 25 10:32:44 2010 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdb.5,v 1.1 2010/04/25 00:54:46 joerg Exp $ +.\" $NetBSD: cdb.5,v 1.2 2010/04/25 10:32:44 wiz Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -38,12 +38,12 @@ The .Nm database format provides a space-efficient (key,value) database. -The format doesn't allow updates in any convient form. -The file overhead is around 5 Byte per key and 5 Byte per entry. -Keys are not stored and it is the responsibility of the using application +The format doesn't allow updates in any convenient form. +The file overhead is around 5 bytes per key and 5 bytes per entry. +Keys are not stored and it is the responsibility of the caller to validate matches. The index structure is based on a minimal perfect hash table, so exactly -one entry has to be checked for match. +one entry has to be checked for a match. .Ss General Format The header record of a .Nm @@ -68,7 +68,7 @@ .Va entries to base 256, rounded up. .Pp -The index records are followed by the the start offsets of the entries, +The index records are followed by the start offsets of the entries, followed by .Va data_size . The offsets are relative to the end of the offset record table and are @@ -82,9 +82,9 @@ .Ss Limitations The .Nm -file format is by design intended for a databases that can be +file format is by design intended for a database that can be mapped into memory. -The hard limit for the number of entries and keys 3435973836. +The hard limit for the number of entries and keys is 3435973836. The total size of all values must be smaller than 4GiB. .Sh SEE ALSO .Xr cdbr 3 , Index: src/lib/libc/cdb/cdbr.3 diff -u src/lib/libc/cdb/cdbr.3:1.1 src/lib/libc/cdb/cdbr.3:1.2 --- src/lib/libc/cdb/cdbr.3:1.1 Sun Apr 25 00:54:46 2010 +++ src/lib/libc/cdb/cdbr.3 Sun Apr 25 10:32:44 2010 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdbr.3,v 1.1 2010/04/25 00:54:46 joerg Exp $ +.\" $NetBSD: cdbr.3,v 1.2 2010/04/25 10:32:44 wiz Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -60,7 +60,7 @@ .Sh DESCRIPTION The .Nm -library provides a space efficient key,value database based +library provides a space efficient (key,value) database based on perfect hashing. .Pp A cdb database is opened for reading by calling @@ -81,7 +81,7 @@ .Pp The number of records in the database can be obtained by calling .Fn cdbr_entries . -Records can be obtained by record number by using +Records can be obtained by record number using .Fn cdbr_get or by key using .Fn cdbr_find . @@ -97,13 +97,13 @@ is called. It is the responsibility of the caller of .Fn cdbr_find -to ensure, that the key matches the returned data. +to ensure that the key matches the returned data. The function returns the only possible match, but the database doesn't store the keys to minimize overhead. .Sh SEE ALSO .Xr nbperf 1 , -.Xr db 3 , .Xr cdbw 3 , +.Xr db 3 , .Xr cdb 5 .Sh HISTORY Support for the Index: src/lib/libc/cdb/cdbw.3 diff -u src/lib/libc/cdb/cdbw.3:1.1 src/lib/libc/cdb/cdbw.3:1.2 --- src/lib/libc/cdb/cdbw.3:1.1 Sun Apr 25 00:54:46 2010 +++ src/lib/libc/cdb/cdbw.3 Sun Apr 25 10:32:44 2010 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdbw.3,v 1.1 2010/04/25 00:54:46 joerg Exp $ +.\" $NetBSD: cdbw.3,v 1.2 2010/04/25 10:32:44 wiz Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -76,8 +76,8 @@ .Sh DESCRIPTION The .Nm cdbw -funcations are used to create a constant databases for use with -.Xr cdbr 5 . +functions are used to create a constant databases for use with +.Xr cdbr 3 . Details about the file format, including overhead and limitations, can be found in .Xr cdb 5 . @@ -92,7 +92,7 @@ frees all resources associated with the handle. .Pp .Fn cdbw_put -adds the given key,value pair after checking for a duplicate key. +adds the given (key,value) pair after checking for a duplicate key. .Fn cdbw_put_data adds the given value to the writer without adding a key reference. The returned index can be used in subsequent calls to @@ -104,7 +104,7 @@ .Pp .Fn cdbw_output computes the database file and writes it to the given descriptor. -The function returns an error, if the file cannot be written correctly. +The function returns an error if the file cannot be written correctly. The .Fn descr parameter
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: jruoho Date: Tue Apr 27 14:26:52 UTC 2010 Modified Files: src/lib/libc/cdb: cdb.5 Log Message: .Pp after .Ed. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/lib/libc/cdb/cdb.5 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdb.5 diff -u src/lib/libc/cdb/cdb.5:1.2 src/lib/libc/cdb/cdb.5:1.3 --- src/lib/libc/cdb/cdb.5:1.2 Sun Apr 25 10:32:44 2010 +++ src/lib/libc/cdb/cdb.5 Tue Apr 27 14:26:52 2010 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdb.5,v 1.2 2010/04/25 10:32:44 wiz Exp $ +.\" $NetBSD: cdb.5,v 1.3 2010/04/27 14:26:52 jruoho Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -28,7 +28,7 @@ .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. -.Dd March 3, 2010 +.Dd April 27, 2010 .Dt CDB 5 .Os .Sh NAME @@ -59,6 +59,7 @@ uint32_t seed; }; .Ed +.Pp All fields are in Little Endian byte order. .Pp This is followed by a description of the hash function of
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: veego Date: Thu Jun 3 12:40:52 UTC 2010 Modified Files: src/lib/libc/cdb: cdbr.c Log Message: Use MAP_FILE|MAP_SHARED instead of MAP_FILE for the flags parameter of mmap(2). Patch is from martin. Solves my own PR kern/43346 To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/lib/libc/cdb/cdbr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbr.c diff -u src/lib/libc/cdb/cdbr.c:1.1 src/lib/libc/cdb/cdbr.c:1.2 --- src/lib/libc/cdb/cdbr.c:1.1 Sun Apr 25 00:54:46 2010 +++ src/lib/libc/cdb/cdbr.c Thu Jun 3 12:40:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: cdbr.c,v 1.1 2010/04/25 00:54:46 joerg Exp $ */ +/* $NetBSD: cdbr.c,v 1.2 2010/06/03 12:40:52 veego Exp $ */ /*- * Copyright (c) 2010 The NetBSD Foundation, Inc. * All rights reserved. @@ -36,7 +36,7 @@ #endif #include -__RCSID("$NetBSD: cdbr.c,v 1.1 2010/04/25 00:54:46 joerg Exp $"); +__RCSID("$NetBSD: cdbr.c,v 1.2 2010/06/03 12:40:52 veego Exp $"); #include "namespace.h" @@ -122,7 +122,7 @@ cdbr->index_size = 4; cdbr->mmap_size = (size_t)sb.st_size; - cdbr->mmap_base = mmap(NULL, cdbr->mmap_size, PROT_READ, MAP_FILE, fd, 0); + cdbr->mmap_base = mmap(NULL, cdbr->mmap_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, 0); close(fd); if (cdbr->mmap_base == MAP_FAILED) {
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: plunky Date: Wed Nov 3 16:17:48 UTC 2010 Modified Files: src/lib/libc/cdb: cdbw.3 Log Message: this page title is CDBW To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/lib/libc/cdb/cdbw.3 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.3 diff -u src/lib/libc/cdb/cdbw.3:1.2 src/lib/libc/cdb/cdbw.3:1.3 --- src/lib/libc/cdb/cdbw.3:1.2 Sun Apr 25 10:32:44 2010 +++ src/lib/libc/cdb/cdbw.3 Wed Nov 3 16:17:48 2010 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdbw.3,v 1.2 2010/04/25 10:32:44 wiz Exp $ +.\" $NetBSD: cdbw.3,v 1.3 2010/11/03 16:17:48 plunky Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -28,8 +28,8 @@ .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. -.Dd March 3, 2010 -.Dt CDB 3 +.Dd November 3, 2010 +.Dt CDBW 3 .Os .Sh NAME .Nm cdbw_open ,
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: joerg Date: Tue Mar 13 21:32:12 UTC 2012 Modified Files: src/lib/libc/cdb: cdbw.c Log Message: Revert bloat. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/lib/libc/cdb/cdbw.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.c diff -u src/lib/libc/cdb/cdbw.c:1.2 src/lib/libc/cdb/cdbw.c:1.3 --- src/lib/libc/cdb/cdbw.c:1.2 Tue Mar 13 21:13:31 2012 +++ src/lib/libc/cdb/cdbw.c Tue Mar 13 21:32:12 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: cdbw.c,v 1.2 2012/03/13 21:13:31 christos Exp $ */ +/* $NetBSD: cdbw.c,v 1.3 2012/03/13 21:32:12 joerg Exp $ */ /*- * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc. * All rights reserved. @@ -36,13 +36,12 @@ #endif #include -__RCSID("$NetBSD: cdbw.c,v 1.2 2012/03/13 21:13:31 christos Exp $"); +__RCSID("$NetBSD: cdbw.c,v 1.3 2012/03/13 21:32:12 joerg Exp $"); #include "namespace.h" #include #include -#include #include #include #include @@ -168,8 +167,7 @@ cdbw_put_data(struct cdbw *cdbw, const v memcpy(cdbw->data_ptr[cdbw->data_counter], data, datalen); cdbw->data_len[cdbw->data_counter] = datalen; cdbw->data_size += datalen; - _DIAGASSERT(__type_fit(uint32_t, cdbw->data_counter)); - *idx = (uint32_t)cdbw->data_counter++; + *idx = cdbw->data_counter++; return 0; } @@ -332,9 +330,7 @@ remove_vertex(struct state *state, struc return; } - ptrdiff_t td = e - state->edges; - _DIAGASSERT(__type_fit(uint32_t, td)); - state->output_order[--state->output_index] = (uint32_t)td; + state->output_order[--state->output_index] = e - state->edges; vl = &state->verts[e->left]; vm = &state->verts[e->middle]; @@ -369,7 +365,8 @@ build_graph(struct cdbw *cdbw, struct st struct key_hash *key_hash; struct vertex *v; struct edge *e; - uint32_t hashes[3], i; + uint32_t hashes[3]; + size_t i; e = state->edges; for (i = 0; i < cdbw->hash_size; ++i) { @@ -495,10 +492,8 @@ print_hash(struct cdbw *cdbw, struct sta memcpy(buf, "NBCDB\n\0", 7); buf[7] = 1; strncpy((char *)buf + 8, descr, 16); - _DIAGASSERT(__type_fit(uint32_t, cdbw->data_size)); - le32enc(buf + 24, (uint32_t)cdbw->data_size); - _DIAGASSERT(__type_fit(uint32_t, cdbw->data_counter)); - le32enc(buf + 28, (uint32_t)cdbw->data_counter); + le32enc(buf + 24, cdbw->data_size); + le32enc(buf + 28, cdbw->data_counter); le32enc(buf + 32, state->entries); le32enc(buf + 36, state->seed); cur_pos = 40; @@ -509,8 +504,7 @@ print_hash(struct cdbw *cdbw, struct sta le32enc(buf + cur_pos, state->g[i]); cur_pos += size; } - _DIAGASSERT(__type_fit(uint32_t, cdbw->data_counter)); - size2 = compute_size((uint32_t)cdbw->data_size); + size2 = compute_size(cdbw->data_size); size = size * state->entries % size2; if (size != 0) { size = size2 - size; @@ -522,9 +516,7 @@ print_hash(struct cdbw *cdbw, struct sta COND_FLUSH_BUFFER(4); le32enc(buf + cur_pos, data_size); cur_pos += size2; - _DIAGASSERT(__type_fit(uint32_t, - data_size + cdbw->data_len[i])); - data_size += (uint32_t)cdbw->data_len[i]; + data_size += cdbw->data_len[i]; } COND_FLUSH_BUFFER(4); le32enc(buf + cur_pos, data_size); @@ -569,10 +561,8 @@ cdbw_output(struct cdbw *cdbw, int fd, c rv = 0; - _DIAGASSERT(__type_fit(uint32_t, cdbw->key_counter)); - state.keys = (uint32_t)cdbw->key_counter; - _DIAGASSERT(__type_fit(uint32_t, cdbw->key_counter)); - state.data_entries = (uint32_t)cdbw->data_counter; + state.keys = cdbw->key_counter; + state.data_entries = cdbw->data_counter; state.entries = state.keys + (state.keys + 3) / 4; if (state.entries < 10) state.entries = 10;
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: christos Date: Sat Mar 17 17:59:58 UTC 2012 Modified Files: src/lib/libc/cdb: Makefile.inc Log Message: hack to silence lint To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/lib/libc/cdb/Makefile.inc Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/Makefile.inc diff -u src/lib/libc/cdb/Makefile.inc:1.1 src/lib/libc/cdb/Makefile.inc:1.2 --- src/lib/libc/cdb/Makefile.inc:1.1 Sat Apr 24 20:54:46 2010 +++ src/lib/libc/cdb/Makefile.inc Sat Mar 17 13:59:58 2012 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.inc,v 1.1 2010/04/25 00:54:46 joerg Exp $ +# $NetBSD: Makefile.inc,v 1.2 2012/03/17 17:59:58 christos Exp $ # Constant database reader/writer @@ -19,3 +19,6 @@ MLINKS+= cdbw.3 cdbw_put_data.3 MLINKS+= cdbw.3 cdbw_put_key.3 MLINKS+= cdbw.3 cdbw_output.3 MLINKS+= cdbw.3 cdbw_close.3 + +# XXX: Fix the code instead +LINTFLAGS.cdbw.c += -X 132,259,298
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: wiz Date: Mon Jun 4 00:26:29 UTC 2012 Modified Files: src/lib/libc/cdb: cdbw.3 Log Message: Fix typos. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/lib/libc/cdb/cdbw.3 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.3 diff -u src/lib/libc/cdb/cdbw.3:1.4 src/lib/libc/cdb/cdbw.3:1.5 --- src/lib/libc/cdb/cdbw.3:1.4 Sun Jun 3 21:02:50 2012 +++ src/lib/libc/cdb/cdbw.3 Mon Jun 4 00:26:29 2012 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdbw.3,v 1.4 2012/06/03 21:02:50 joerg Exp $ +.\" $NetBSD: cdbw.3,v 1.5 2012/06/04 00:26:29 wiz Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -118,10 +118,10 @@ The .Fn seedgen parameter can be used to override the default PRNG. The bitwise layout of the output depends on the chosen seed. -The function should return a different value for each invokation. +The function should return a different value for each invocation. The .Fn cdbw_stable_seeder -can be used to create reproducable output. +can be used to create reproducible output. It may be slower than the default. .Sh SEE ALSO .Xr cdbr 3 ,
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: joerg Date: Sat Jul 21 22:49:37 UTC 2012 Modified Files: src/lib/libc/cdb: cdbw.c Log Message: Redo hashing, if two of the three individual hashes result in identical hash modules. This is the trivial case for loops in the 3-graph and got lost when adopting the nbperf code. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/lib/libc/cdb/cdbw.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.c diff -u src/lib/libc/cdb/cdbw.c:1.4 src/lib/libc/cdb/cdbw.c:1.5 --- src/lib/libc/cdb/cdbw.c:1.4 Sun Jun 3 21:02:50 2012 +++ src/lib/libc/cdb/cdbw.c Sat Jul 21 22:49:37 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: cdbw.c,v 1.4 2012/06/03 21:02:50 joerg Exp $ */ +/* $NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $ */ /*- * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc. * All rights reserved. @@ -36,7 +36,7 @@ #endif #include -__RCSID("$NetBSD: cdbw.c,v 1.4 2012/06/03 21:02:50 joerg Exp $"); +__RCSID("$NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $"); #include "namespace.h" @@ -387,6 +387,13 @@ build_graph(struct cdbw *cdbw, struct st e->middle = hashes[1] % state->entries; e->right = hashes[2] % state->entries; + if (e->left == e->middle) +return -1; + if (e->left == e->right) +return -1; + if (e->middle == e->right) +return -1; + ++e; } }
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: joerg Date: Thu Sep 27 00:37:43 UTC 2012 Modified Files: src/lib/libc/cdb: cdbr.c Log Message: Don't refuse the open databases without entries or keys, just protect the divisions. cdbr_find and cdbr_get already have the appropiate checks. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/lib/libc/cdb/cdbr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbr.c diff -u src/lib/libc/cdb/cdbr.c:1.3 src/lib/libc/cdb/cdbr.c:1.4 --- src/lib/libc/cdb/cdbr.c:1.3 Mon Jun 4 19:06:45 2012 +++ src/lib/libc/cdb/cdbr.c Thu Sep 27 00:37:43 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: cdbr.c,v 1.3 2012/06/04 19:06:45 joerg Exp $ */ +/* $NetBSD: cdbr.c,v 1.4 2012/09/27 00:37:43 joerg Exp $ */ /*- * Copyright (c) 2010 The NetBSD Foundation, Inc. * All rights reserved. @@ -36,7 +36,7 @@ #endif #include -__RCSID("$NetBSD: cdbr.c,v 1.3 2012/06/04 19:06:45 joerg Exp $"); +__RCSID("$NetBSD: cdbr.c,v 1.4 2012/09/27 00:37:43 joerg Exp $"); #include "namespace.h" @@ -152,17 +152,21 @@ cdbr_open(const char *path, int flags) cdbr->data_base < cdbr->mmap_base || cdbr->data_base + cdbr->data_size < cdbr->mmap_base || cdbr->data_base + cdbr->data_size > - cdbr->mmap_base + cdbr->mmap_size || - cdbr->entries == 0 || cdbr->entries_index == 0) { + cdbr->mmap_base + cdbr->mmap_size) { errno = EINVAL; cdbr_close(cdbr); return NULL; } - fast_divide32_prepare(cdbr->entries, &cdbr->entries_m, - &cdbr->entries_s1, &cdbr->entries_s2); - fast_divide32_prepare(cdbr->entries_index, &cdbr->entries_index_m, - &cdbr->entries_index_s1, &cdbr->entries_index_s2); + if (cdbr->entries) { + fast_divide32_prepare(cdbr->entries, &cdbr->entries_m, + &cdbr->entries_s1, &cdbr->entries_s2); + } + if (cdbr->entries_index) { + fast_divide32_prepare(cdbr->entries_index, + &cdbr->entries_index_m, + &cdbr->entries_index_s1, &cdbr->entries_index_s2); + } return cdbr; }
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: wiz Date: Sun Feb 8 19:09:56 UTC 2015 Modified Files: src/lib/libc/cdb: cdb.5 Log Message: Fix typo. Reported by rudolf on netbsd-docs. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/lib/libc/cdb/cdb.5 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdb.5 diff -u src/lib/libc/cdb/cdb.5:1.5 src/lib/libc/cdb/cdb.5:1.6 --- src/lib/libc/cdb/cdb.5:1.5 Tue Mar 18 18:20:37 2014 +++ src/lib/libc/cdb/cdb.5 Sun Feb 8 19:09:56 2015 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdb.5,v 1.5 2014/03/18 18:20:37 riastradh Exp $ +.\" $NetBSD: cdb.5,v 1.6 2015/02/08 19:09:56 wiz Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -73,7 +73,7 @@ The index records are followed by the st followed by .Va data_size . The offsets are relative to the end of the offset record table and are -monotically increasing. +monotonically increasing. The size of each offset record is the logarithm of .Va data_size to base 256, rounded up.
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: christos Date: Tue Dec 10 20:58:45 UTC 2013 Modified Files: src/lib/libc/cdb: cdbr.c Log Message: CID 1135779: Fix resource leak To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/lib/libc/cdb/cdbr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbr.c diff -u src/lib/libc/cdb/cdbr.c:1.5 src/lib/libc/cdb/cdbr.c:1.6 --- src/lib/libc/cdb/cdbr.c:1.5 Thu Dec 5 16:17:23 2013 +++ src/lib/libc/cdb/cdbr.c Tue Dec 10 15:58:45 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: cdbr.c,v 1.5 2013/12/05 21:17:23 joerg Exp $ */ +/* $NetBSD: cdbr.c,v 1.6 2013/12/10 20:58:45 christos Exp $ */ /*- * Copyright (c) 2010 The NetBSD Foundation, Inc. * All rights reserved. @@ -36,7 +36,7 @@ #endif #include -__RCSID("$NetBSD: cdbr.c,v 1.5 2013/12/05 21:17:23 joerg Exp $"); +__RCSID("$NetBSD: cdbr.c,v 1.6 2013/12/10 20:58:45 christos Exp $"); #include "namespace.h" @@ -119,6 +119,7 @@ cdbr_open(const char *path, int flags) } if (sb.st_size >= SSIZE_MAX) { + close(fd); errno = EINVAL; return NULL; }
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: joerg Date: Wed Dec 11 01:29:29 UTC 2013 Removed Files: src/lib/libc/cdb: cdbr.c Log Message: Moved to src/common. To generate a diff of this commit: cvs rdiff -u -r1.6 -r0 src/lib/libc/cdb/cdbr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: rmind Date: Thu Feb 6 15:47:20 UTC 2014 Modified Files: src/lib/libc/cdb: cdbw.3 Log Message: cdbw(3) man page: fix the header file name and use .Fa for function arguments. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/lib/libc/cdb/cdbw.3 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.3 diff -u src/lib/libc/cdb/cdbw.3:1.6 src/lib/libc/cdb/cdbw.3:1.7 --- src/lib/libc/cdb/cdbw.3:1.6 Sat Jul 20 21:39:56 2013 +++ src/lib/libc/cdb/cdbw.3 Thu Feb 6 15:47:20 2014 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdbw.3,v 1.6 2013/07/20 21:39:56 wiz Exp $ +.\" $NetBSD: cdbw.3,v 1.7 2014/02/06 15:47:20 rmind Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -41,7 +41,7 @@ .Nm cdbw_close .Nd create constant databases .Sh SYNOPSIS -.In archive_entry.h +.In cdbw.h .Ft "struct cdbw *" .Fn cdbw_open "void" .Ft int @@ -112,10 +112,10 @@ On success it adds the given key. computes the database file and writes it to the given descriptor. The function returns an error if the file cannot be written correctly. The -.Fn descr +.Fa descr parameter provides a human readable description of the database content. The -.Fn seedgen +.Fa seedgen parameter can be used to override the default PRNG. The bitwise layout of the output depends on the chosen seed. The function should return a different value for each invocation.
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: rmind Date: Thu Feb 6 15:50:40 UTC 2014 Modified Files: src/lib/libc/cdb: cdbw.3 Log Message: bump the date To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/lib/libc/cdb/cdbw.3 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.3 diff -u src/lib/libc/cdb/cdbw.3:1.7 src/lib/libc/cdb/cdbw.3:1.8 --- src/lib/libc/cdb/cdbw.3:1.7 Thu Feb 6 15:47:20 2014 +++ src/lib/libc/cdb/cdbw.3 Thu Feb 6 15:50:40 2014 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdbw.3,v 1.7 2014/02/06 15:47:20 rmind Exp $ +.\" $NetBSD: cdbw.3,v 1.8 2014/02/06 15:50:40 rmind Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -28,7 +28,7 @@ .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. -.Dd June 3, 2012 +.Dd February 6, 2014 .Dt CDBW 3 .Os .Sh NAME
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: abhinav Date: Tue Oct 24 17:01:15 UTC 2017 Modified Files: src/lib/libc/cdb: cdbr.3 Log Message: Remove cdbr_write from NAME section, it's a left over Also add comma after the first Nm entry ok joerg@ To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/lib/libc/cdb/cdbr.3 Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbr.3 diff -u src/lib/libc/cdb/cdbr.3:1.4 src/lib/libc/cdb/cdbr.3:1.5 --- src/lib/libc/cdb/cdbr.3:1.4 Thu Dec 5 21:17:23 2013 +++ src/lib/libc/cdb/cdbr.3 Tue Oct 24 17:01:15 2017 @@ -1,4 +1,4 @@ -.\" $NetBSD: cdbr.3,v 1.4 2013/12/05 21:17:23 joerg Exp $ +.\" $NetBSD: cdbr.3,v 1.5 2017/10/24 17:01:15 abhinav Exp $ .\" .\" Copyright (c) 2010 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -28,18 +28,17 @@ .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. -.Dd December 5, 2013 +.Dd October 24, 2017 .Dt CDBR 3 .Os .Sh NAME -.Nm cdbr +.Nm cdbr , .Nm cdbr_open , .Nm cdbr_open_mem , .Nm cdbr_entries , .Nm cdbr_get , .Nm cdbr_find , -.Nm cdbr_close , -.Nm cdbr_write +.Nm cdbr_close .Nd constant database access methods .Sh SYNOPSIS .Ft "struct cdbr *"
CVS commit: src/lib/libc/cdb
Module Name:src Committed By: alnsn Date: Sat Nov 11 18:05:31 UTC 2017 Modified Files: src/lib/libc/cdb: cdbw.c Log Message: Use a more efficient data structure for graph peeling. New code is about 50% faster on amd64 and it consumes less memory. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/lib/libc/cdb/cdbw.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/lib/libc/cdb/cdbw.c diff -u src/lib/libc/cdb/cdbw.c:1.5 src/lib/libc/cdb/cdbw.c:1.6 --- src/lib/libc/cdb/cdbw.c:1.5 Sat Jul 21 22:49:37 2012 +++ src/lib/libc/cdb/cdbw.c Sat Nov 11 18:05:31 2017 @@ -1,10 +1,10 @@ -/* $NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $ */ +/* $NetBSD: cdbw.c,v 1.6 2017/11/11 18:05:31 alnsn Exp $ */ /*- - * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc. + * Copyright (c) 2009, 2010, 2015 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Joerg Sonnenberger. + * by Joerg Sonnenberger and Alexander Nasonov. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -36,7 +36,7 @@ #endif #include -__RCSID("$NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $"); +__RCSID("$NetBSD: cdbw.c,v 1.6 2017/11/11 18:05:31 alnsn Exp $"); #include "namespace.h" @@ -278,18 +278,31 @@ cdbw_stable_seeder(void) return 0; } -#define unused 0xU +/* + * The algorithm below is based on paper + * Cache-Oblivious Peeling of Random Hypergraphs by Djamal Belazzougui, + * Paolo Boldi, Giuseppe Ottaviano, Rossano Venturini, and Sebastiano + * Vigna. + * http://zola.di.unipi.it/rossano/wp-content/papercite-data/pdf/dcc14.pdf + */ -struct vertex { - uint32_t l_edge, m_edge, r_edge; +/* + * Data type for a valid oriented edge (v0, v1, v2), v1 < v2. + * The first vertex v0 is implicit and is determined by an index + * of the corresponding element in the state->oedges array. + * If the degree of v0 is greater than 1, other members don't + * make sense because they're a result of XORing multiple values. + */ +struct oedge { + uint32_t degree; /* Degree of v0. */ + uint32_t verts[2]; /* v1 and v2 */ + uint32_t edge; }; struct edge { uint32_t idx; uint32_t left, middle, right; - uint32_t l_prev, m_prev, l_next; - uint32_t r_prev, m_next, r_next; }; struct state { @@ -301,69 +314,49 @@ struct state { uint32_t *g; char *visited; - struct vertex *verts; + struct oedge *oedges; struct edge *edges; uint32_t output_index; uint32_t *output_order; }; -static void -remove_vertex(struct state *state, struct vertex *v) +/* + * Add (delta == 1) or remove (delta == -1) the edge e from vertex v0. + */ +static inline void +add_remove_edge(struct oedge *o, int delta, uint32_t e, +uint32_t v0, uint32_t v1, uint32_t v2) { - struct edge *e; - struct vertex *vl, *vm, *vr; - if (v->l_edge != unused && v->m_edge != unused) - return; - if (v->l_edge != unused && v->r_edge != unused) - return; - if (v->m_edge != unused && v->r_edge != unused) - return; - if (v->l_edge == unused && v->m_edge == unused && v->r_edge == unused) - return; - - if (v->l_edge != unused) { - e = &state->edges[v->l_edge]; - if (e->l_next != unused) - return; - } else if (v->m_edge != unused) { - e = &state->edges[v->m_edge]; - if (e->m_next != unused) - return; - } else { - if (v->r_edge == unused) - abort(); - e = &state->edges[v->r_edge]; - if (e->r_next != unused) - return; - } - - state->output_order[--state->output_index] = e - state->edges; - - vl = &state->verts[e->left]; - vm = &state->verts[e->middle]; - vr = &state->verts[e->right]; + o[v0].verts[v1 < v2 ? 0 : 1] ^= v1; + o[v0].verts[v1 < v2 ? 1 : 0] ^= v2; + o[v0].degree += delta; + o[v0].edge ^= e; +} - if (e->l_prev == unused) - vl->l_edge = e->l_next; - else - state->edges[e->l_prev].l_next = e->l_next; - if (e->l_next != unused) - state->edges[e->l_next].l_prev = e->l_prev; +static inline void +add_edge(struct oedge *o, uint32_t e, +uint32_t v0, uint32_t v1, uint32_t v2) +{ - if (e->m_prev == unused) - vm->m_edge = e->m_next; - else - state->edges[e->m_prev].m_next = e->m_next; - if (e->m_next != unused) - state->edges[e->m_next].m_prev = e->m_prev; + add_remove_edge(o, 1, e, v0, v1, v2); +} - if (e->r_prev == unused) - vr->r_edge = e->r_next; - else - state->edges[e->r_prev].r_next = e->r_next; - if (e->r_next != unused) - state->edges[e->r_next].r_prev = e->r_prev; +static inline void +remove_vertex(struct state *state, uint32_t v0) +{ + uint32_t e, v1, v2; + struct oedge *o = state->oedges; + + if (o[v0].degree == 1) { + e = o[v0].edge; + v1 = o[v0].verts[0]; + v2 = o[v0].verts[1]; + o[v0].degree = 0; + add_remove_edge(o, -1, e, v1, v0, v2); + add_remove_edge(o, -1, e, v2, v0, v1); + state->output_order[--state-