I wrote:
> Jeff Janes <jeff.ja...@gmail.com> writes:
>> My biggest gripe with it at the moment is that the signature size should be
>> expressed in bits, and then internally rounded up to a multiple of 16,
>> rather than having it be expressed in 'uint16'.
>> If that were done it would be easier to fix the documentation to be more
>> understandable.

> +1 ... that sort of definition seems much more future-proof, too.
> IMO it's not too late to change this.  (We probably don't want to change
> the on-disk representation of the reloptions, but we could convert from
> bits to words in bloptions().)

There were no objections to this, but also no action.  Attached is a draft
patch ... any complaints?

                        regards, tom lane

diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index c21eebf..a9daf63 100644
*** a/contrib/bloom/bloom.h
--- b/contrib/bloom/bloom.h
*************** typedef BloomPageOpaqueData *BloomPageOp
*** 79,96 ****
  #define BLOOM_HEAD_BLKNO		(1)		/* first data page */
  
  /*
!  * Maximum of bloom signature length in uint16. Actual value
!  * is 512 bytes
   */
! #define MAX_BLOOM_LENGTH		(256)
  
  /* Bloom index options */
  typedef struct BloomOptions
  {
  	int32		vl_len_;		/* varlena header (do not touch directly!) */
! 	int			bloomLength;	/* length of signature in uint16 */
! 	int			bitSize[INDEX_MAX_KEYS];		/* signature bits per index
! 												 * key */
  }	BloomOptions;
  
  /*
--- 79,108 ----
  #define BLOOM_HEAD_BLKNO		(1)		/* first data page */
  
  /*
!  * We store Bloom signatures as arrays of uint16 words.
   */
! typedef uint16 BloomSignatureWord;
! 
! #define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
! 
! /*
!  * Default and maximum Bloom signature length in bits.
!  */
! #define DEFAULT_BLOOM_LENGTH	(5 * SIGNWORDBITS)
! #define MAX_BLOOM_LENGTH		(256 * SIGNWORDBITS)
! 
! /*
!  * Default and maximum signature bits generated per index key.
!  */
! #define DEFAULT_BLOOM_BITS		2
! #define MAX_BLOOM_BITS			(MAX_BLOOM_LENGTH - 1)
  
  /* Bloom index options */
  typedef struct BloomOptions
  {
  	int32		vl_len_;		/* varlena header (do not touch directly!) */
! 	int			bloomLength;	/* length of signature in words (not bits!) */
! 	int			bitSize[INDEX_MAX_KEYS];	/* signature bits per index key */
  }	BloomOptions;
  
  /*
*************** typedef struct BloomState
*** 143,154 ****
  /*
   * Tuples are very different from all other relations
   */
- typedef uint16 SignType;
- 
  typedef struct BloomTuple
  {
  	ItemPointerData heapPtr;
! 	SignType	sign[FLEXIBLE_ARRAY_MEMBER];
  }	BloomTuple;
  
  #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
--- 155,164 ----
  /*
   * Tuples are very different from all other relations
   */
  typedef struct BloomTuple
  {
  	ItemPointerData heapPtr;
! 	BloomSignatureWord	sign[FLEXIBLE_ARRAY_MEMBER];
  }	BloomTuple;
  
  #define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
*************** typedef struct BloomTuple
*** 156,162 ****
  /* Opaque data structure for bloom index scan */
  typedef struct BloomScanOpaqueData
  {
! 	SignType   *sign;			/* Scan signature */
  	BloomState	state;
  }	BloomScanOpaqueData;
  
--- 166,172 ----
  /* Opaque data structure for bloom index scan */
  typedef struct BloomScanOpaqueData
  {
! 	BloomSignatureWord   *sign;			/* Scan signature */
  	BloomState	state;
  }	BloomScanOpaqueData;
  
*************** extern void BloomFillMetapage(Relation i
*** 170,176 ****
  extern void BloomInitMetapage(Relation index);
  extern void BloomInitPage(Page page, uint16 flags);
  extern Buffer BloomNewBuffer(Relation index);
! extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
  extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
  extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
  
--- 180,186 ----
  extern void BloomInitMetapage(Relation index);
  extern void BloomInitPage(Page page, uint16 flags);
  extern Buffer BloomNewBuffer(Relation index);
! extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno);
  extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
  extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
  
diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
index aebf32a..0c954dc 100644
*** a/contrib/bloom/blscan.c
--- b/contrib/bloom/blscan.c
*************** blgetbitmap(IndexScanDesc scan, TIDBitma
*** 93,99 ****
  		/* New search: have to calculate search signature */
  		ScanKey		skey = scan->keyData;
  
! 		so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength);
  
  		for (i = 0; i < scan->numberOfKeys; i++)
  		{
--- 93,99 ----
  		/* New search: have to calculate search signature */
  		ScanKey		skey = scan->keyData;
  
! 		so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
  
  		for (i = 0; i < scan->numberOfKeys; i++)
  		{
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 4a5b343..8f13476 100644
*** a/contrib/bloom/blutils.c
--- b/contrib/bloom/blutils.c
***************
*** 27,49 ****
  
  #include "bloom.h"
  
! /* Signature dealing macros */
! #define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
! #define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
! #define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
! #define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % BITSIGNTYPE ) )
! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
  
  PG_FUNCTION_INFO_V1(blhandler);
  
! /* Kind of relation optioms for bloom index */
  static relopt_kind bl_relopt_kind;
  
  static int32 myRand(void);
  static void mySrand(uint32 seed);
  
  /*
!  * Module initialize function: initilized relation options.
   */
  void
  _PG_init(void)
--- 27,52 ----
  
  #include "bloom.h"
  
! /* Signature dealing macros - note i is assumed to be of type int */
! #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
! #define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
! #define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % SIGNWORDBITS ) )
! #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
  
  PG_FUNCTION_INFO_V1(blhandler);
  
! /* Kind of relation options for bloom index */
  static relopt_kind bl_relopt_kind;
+ /* parse table for fillRelOptions */
+ static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
  
  static int32 myRand(void);
  static void mySrand(uint32 seed);
  
  /*
!  * Module initialize function: initialize info about Bloom relation options.
!  *
!  * Note: keep this in sync with makeDefaultBloomOptions().
   */
  void
  _PG_init(void)
*************** _PG_init(void)
*** 53,70 ****
  
  	bl_relopt_kind = add_reloption_kind();
  
  	add_int_reloption(bl_relopt_kind, "length",
! 					  "Length of signature in uint16 type", 5, 1, 256);
  
  	for (i = 0; i < INDEX_MAX_KEYS; i++)
  	{
! 		snprintf(buf, 16, "col%d", i + 1);
  		add_int_reloption(bl_relopt_kind, buf,
! 					  "Number of bits for corresponding column", 2, 1, 2048);
  	}
  }
  
  /*
   * Bloom handler function: return IndexAmRoutine with access method parameters
   * and callbacks.
   */
--- 56,101 ----
  
  	bl_relopt_kind = add_reloption_kind();
  
+ 	/* Option for length of signature */
  	add_int_reloption(bl_relopt_kind, "length",
! 					  "Length of signature in bits",
! 					  DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
! 	bl_relopt_tab[0].optname = "length";
! 	bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
! 	bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
  
+ 	/* Number of bits for each possible index column: col1, col2, ... */
  	for (i = 0; i < INDEX_MAX_KEYS; i++)
  	{
! 		snprintf(buf, sizeof(buf), "col%d", i + 1);
  		add_int_reloption(bl_relopt_kind, buf,
! 						  "Number of bits for corresponding column",
! 						  DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
! 		bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
! 														   buf);
! 		bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
! 		bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
  	}
  }
  
  /*
+  * Construct a default set of Bloom options.
+  */
+ static BloomOptions *
+ makeDefaultBloomOptions(void)
+ {
+ 	BloomOptions *opts;
+ 	int				i;
+ 
+ 	opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
+ 	opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
+ 	for (i = 0; i < INDEX_MAX_KEYS; i++)
+ 		opts->bitSize[i] = DEFAULT_BLOOM_BITS;
+ 	SET_VARSIZE(opts, sizeof(BloomOptions));
+ 	return opts;
+ }
+ 
+ /*
   * Bloom handler function: return IndexAmRoutine with access method parameters
   * and callbacks.
   */
*************** initBloomState(BloomState *state, Relati
*** 157,163 ****
  
  	memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
  	state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
! 		sizeof(SignType) * state->opts.bloomLength;
  }
  
  /*
--- 188,194 ----
  
  	memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
  	state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
! 		sizeof(BloomSignatureWord) * state->opts.bloomLength;
  }
  
  /*
*************** mySrand(uint32 seed)
*** 208,214 ****
   * Add bits of given value to the signature.
   */
  void
! signValue(BloomState *state, SignType *sign, Datum value, int attno)
  {
  	uint32		hashVal;
  	int			nBit,
--- 239,245 ----
   * Add bits of given value to the signature.
   */
  void
! signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
  {
  	uint32		hashVal;
  	int			nBit,
*************** signValue(BloomState *state, SignType *s
*** 231,238 ****
  
  	for (j = 0; j < state->opts.bitSize[attno]; j++)
  	{
! 		/* prevent mutiple evaluation */
! 		nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
  		SETBIT(sign, nBit);
  	}
  }
--- 262,269 ----
  
  	for (j = 0; j < state->opts.bitSize[attno]; j++)
  	{
! 		/* prevent multiple evaluation in SETBIT macro */
! 		nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
  		SETBIT(sign, nBit);
  	}
  }
*************** BloomInitPage(Page page, uint16 flags)
*** 362,400 ****
  }
  
  /*
-  * Adjust options of bloom index.
-  *
-  * This must produce default options when *opts is initially all-zero.
-  */
- static void
- adjustBloomOptions(BloomOptions *opts)
- {
- 	int				i;
- 
- 	/* Default length of bloom filter is 5 of 16-bit integers */
- 	if (opts->bloomLength <= 0)
- 		opts->bloomLength = 5;
- 	else if (opts->bloomLength > MAX_BLOOM_LENGTH)
- 		ereport(ERROR,
- 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- 				 errmsg("length of bloom signature (%d) is greater than maximum %d",
- 						opts->bloomLength, MAX_BLOOM_LENGTH)));
- 
- 	/* Check signature length */
- 	for (i = 0; i < INDEX_MAX_KEYS; i++)
- 	{
- 		/*
- 		 * Zero and negative number of bits is meaningless.  Also setting
- 		 * more bits than signature have seems useless.  Replace both cases
- 		 * with 2 bits default.
- 		 */
- 		if (opts->bitSize[i] <= 0
- 			|| opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
- 			opts->bitSize[i] = 2;
- 	}
- }
- 
- /*
   * Fill in metapage for bloom index.
   */
  void
--- 393,398 ----
*************** BloomFillMetapage(Relation index, Page m
*** 405,418 ****
  
  	/*
  	 * Choose the index's options.  If reloptions have been assigned, use
! 	 * those, otherwise create default options by applying adjustBloomOptions
! 	 * to a zeroed chunk of memory.  We apply adjustBloomOptions to existing
! 	 * reloptions too, just out of paranoia; they should be valid already.
  	 */
  	opts = (BloomOptions *) index->rd_options;
  	if (!opts)
! 		opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
! 	adjustBloomOptions(opts);
  
  	/*
  	 * Initialize contents of meta page, including a copy of the options,
--- 403,413 ----
  
  	/*
  	 * Choose the index's options.  If reloptions have been assigned, use
! 	 * those, otherwise create default options.
  	 */
  	opts = (BloomOptions *) index->rd_options;
  	if (!opts)
! 		opts = makeDefaultBloomOptions();
  
  	/*
  	 * Initialize contents of meta page, including a copy of the options,
*************** bloptions(Datum reloptions, bool validat
*** 462,491 ****
  	relopt_value *options;
  	int			numoptions;
  	BloomOptions *rdopts;
- 	relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
- 	int			i;
- 	char		buf[16];
- 
- 	/* Option for length of signature */
- 	tab[0].optname = "length";
- 	tab[0].opttype = RELOPT_TYPE_INT;
- 	tab[0].offset = offsetof(BloomOptions, bloomLength);
- 
- 	/* Number of bits for each of possible columns: col1, col2, ... */
- 	for (i = 0; i < INDEX_MAX_KEYS; i++)
- 	{
- 		snprintf(buf, sizeof(buf), "col%d", i + 1);
- 		tab[i + 1].optname = pstrdup(buf);
- 		tab[i + 1].opttype = RELOPT_TYPE_INT;
- 		tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
- 	}
  
  	options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
  	rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
  	fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
! 				   validate, tab, INDEX_MAX_KEYS + 1);
  
! 	adjustBloomOptions(rdopts);
  
  	return (bytea *) rdopts;
  }
--- 457,471 ----
  	relopt_value *options;
  	int			numoptions;
  	BloomOptions *rdopts;
  
+ 	/* Parse the user-given reloptions */
  	options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
  	rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
  	fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
! 				   validate, bl_relopt_tab, lengthof(bl_relopt_tab));
  
! 	/* Convert signature length to words, rounding up */
! 	rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
  
  	return (bytea *) rdopts;
  }
diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml
index 49cb066..abf30e7 100644
*** a/doc/src/sgml/bloom.sgml
--- b/doc/src/sgml/bloom.sgml
***************
*** 8,15 ****
   </indexterm>
  
   <para>
!   <literal>bloom</> is a module which implements an index access method.  It comes
!   as an example of custom access methods and generic WAL records usage.  But it
    is also useful in itself.
   </para>
  
--- 8,15 ----
   </indexterm>
  
   <para>
!   <literal>bloom</> is a module that implements an index access method.  It comes
!   as an example of custom access methods and generic WAL record usage.  But it
    is also useful in itself.
   </para>
  
***************
*** 22,29 ****
     allows fast exclusion of non-candidate tuples via signatures.
     Since a signature is a lossy representation of all indexed attributes,
     search results must be rechecked using heap information.
!    The user can specify signature length (in uint16, default is 5) and the
!    number of bits, which can be set per attribute (1 < colN < 2048).
    </para>
  
    <para>
--- 22,30 ----
     allows fast exclusion of non-candidate tuples via signatures.
     Since a signature is a lossy representation of all indexed attributes,
     search results must be rechecked using heap information.
!    The user can specify signature length in bits (default 80, maximum 4096)
!    and the number of bits generated for each index column (default 2,
!    maximum 4095).
    </para>
  
    <para>
***************
*** 51,64 ****
      <term><literal>length</></term>
      <listitem>
       <para>
!       Length of signature in uint16 type values
       </para>
      </listitem>
     </varlistentry>
     </variablelist>
     <variablelist>
     <varlistentry>
!     <term><literal>col1 &mdash; col16</></term>
      <listitem>
       <para>
        Number of bits for corresponding column
--- 52,65 ----
      <term><literal>length</></term>
      <listitem>
       <para>
!       Length of signature in bits
       </para>
      </listitem>
     </varlistentry>
     </variablelist>
     <variablelist>
     <varlistentry>
!     <term><literal>col1 &mdash; col32</></term>
      <listitem>
       <para>
        Number of bits for corresponding column
***************
*** 77,88 ****
  
  <programlisting>
  CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
!        WITH (length=5, col1=2, col2=2, col3=4);
  </programlisting>
  
    <para>
     Here, we created a bloom index with a signature length of 80 bits,
!    and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
    </para>
  
    <para>
--- 78,89 ----
  
  <programlisting>
  CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
!        WITH (length=80, col1=2, col2=2, col3=4);
  </programlisting>
  
    <para>
     Here, we created a bloom index with a signature length of 80 bits,
!    and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.
    </para>
  
    <para>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to