> Or maybe I just don't understand the proposal. Perhaps it'd be best if
> jian wrote a patch illustrating the idea, and showing how it performs
> compared to the current approach.
currently joel's idea is a int4hashset. based on the code first tomas wrote.
it looks like a non-nested an collection of unique int4. external text
format looks like {int4, int4,int4}
structure looks like (header + capacity slots * int4).
Within the capacity slots, some slots are empty, some have unique values.
The textual int4hashset looks like a one dimensional array.
so I copied/imitated src/backend/utils/adt/arrayfuncs.c code, rewrote a
slight generic hashset input and output function.
see the attached c file.
It works fine for non-null input output for {int4hashset, int8hashset,
timestamphashset,intervalhashset,uuidhashset).
/*
gcc -I/home/jian/postgres/2023_05_25_beta5421/include/server -fPIC -c /home/jian/Desktop/regress_pgsql/set.c
gcc -shared -o /home/jian/Desktop/regress_pgsql/set.so /home/jian/Desktop/regress_pgsql/set.o
CREATE OR REPLACE FUNCTION set_in_out_test(cstring,int, int) RETURNS BOOL SET search_path from current
AS '/home/jian/Desktop/regress_pgsql/set', 'set_in_out_test'
LANGUAGE C IMMUTABLE;
select set_in_out_test('{111,-0,+0,-2147483648,2147483647}',23,-1);
select set_in_out_test('{}',23,-1);
select set_in_out_test('{-1111111,-9223372036854775808,9223372036854775807,-0,+0}',20,-1);
select set_in_out_test('{2022-01-01T11:21:21.741077 +05:30, 2022-01-01T11:21:21.741076 +05:30,-infinity,+infinity}',1114,-1);
select set_in_out_test('{"1hour", " 0:00","-0:00","2000:10:01.23456789"}',1186,-1);
select set_in_out_test('{03aadb61-3e30-4112-a46a-8cd72f29876b,2c1e4b2c-b8f4-470a-843f-dd094e4743a6
,ef1a3202-a84f-4321-ab3c-45e04bf4c42d,2c1e4b2c-b8f4-470a-843f-dd094e4743a6}',2950,-1);
-----------parse fail cases.
select set_in_out_test('{{-1111111,-9223372036854775808,9223372036854775807,-0,+0}}',20,-1);
select set_in_out_test('{-1111111,-9223372036854775808,9223372036854775807,-0,+0.1}',20,-1);
select set_in_out_test('{-1111111,-9223372036854775808,9223372036854775808,-0,+0.1}',20,-1);
select set_in_out_test('{2022-01-01T11:21:21.7410as +05:30}',1114,-1);
select set_in_out_test('{2022-01-01T11:21:21.7410 +05:3a}',1114,-1);
select set_in_out_test('{2022-01-01T11:21:21.7410 +0x:31}',1114,-1);
select set_in_out_test('{{2022-01-01T11:21:21.7410 +01:31}}',1114,-1);
select set_in_out_test('{NULL,1,2,NULL,NULL}',23,-1);
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
// #include "utils/array.h"
#include "utils/numeric.h"
#include "utils/timestamp.h"
#include "funcapi.h"
#include "utils/lsyscache.h"
#include "utils/fmgrprotos.h"
#include "common/hashfn.h"
#include "utils/uuid.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(set_in_out_test);
#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
#define HASHSET_STEP 13
#define PG_RETURN_SETTYPE_P(x) PG_RETURN_POINTER(x)
/*
* Arrays are varlena objects, so must meet the varlena convention that
* the first int32 of the object contains the total object size in bytes.
* Be sure to use VARSIZE() and SET_VARSIZE() to access it, though!
*
* CAUTION: if you change the header for ordinary arrays you will also
* need to change the headers for oidvector and int2vector!
*/
typedef struct SetType
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 capacity; /* # of capacity */
int32 dataoffset; /* offset to data, or 0 if no bitmap */
int32 nelements; /* number of items added to the hashset */
Oid elemtype; /* element type OID */
} SetType;
#define SET_SIZE(a) VARSIZE(a)
#define SET_ITEM(a) ((a)->nelements)
#define SET_CAPACITY(a) ((a)->capacity)
#define SET_HASNULL(a) ((a)->dataoffset != 0)
#define SET_ELEMTYPE(a) ((a)->elemtype)
#define SET_OVERHEAD_NONULLS(capacity) \
MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8))
#define SET_OVERHEAD_WITHNULLS(capacity,nelements) \
MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8) + \
((nelements) + 7) / 8)
#define SET_BITMAP(a) \
(((char *) (a)) + sizeof(SetType))
#define SET_DATA_OFFSET(a) \
(SET_HASNULL(a) ? (a)->dataoffset : SET_OVERHEAD_NONULLS(SET_CAPACITY(a)))
#define SET_NULLBITMAP(a) \
(SET_HASNULL(a) ? \
(bits8 *) (((char *) (a)) + sizeof(SetType) + \
((a->nelements) + 7) / 8) \
: (bits8 *) NULL)
/*
* Returns a pointer to the actual array data.
*/
#define SET_DATA_PTR(a) \
(((char *) (a)) + SET_DATA_OFFSET(a))
typedef struct SetMetaState
{
Oid element_type;
int16 typlen;
bool typbyval;
char typalign;
char typdelim;
Oid typioparam;
Oid typiofunc;
FmgrInfo proc;
} SetMetaState;
static int SetCount(const char *str, int *dim, char typdelim, Node *escontext);
static bool set_get_isnull(const bits8 *nullbitmap, int offset);
SetType *set_add_int32(SetType *set, Datum dvalue);
SetType *set_add_int64(SetType *set, Datum dvalue);
SetType *set_add_interval(SetType *set, Datum dvalue);
SetType *set_add_uuid(SetType *set, Datum dvalue);
SetType *set_add_timestamp(SetType *set, Datum dvalue);
SetType *construct_empty_set(Oid elmtype);
SetType *set_init(int capacity,Oid elemtype, bool hasnull);
char * SetOut(SetType *retval);
void
CopySetNulls(SetType *set,
Datum *values,
bool *nulls,
int nitems,
int typlen,
bool typbyval,
char typalign,
bool freedata);
/*
* Check whether a specific array element is NULL
*
* nullbitmap: pointer to array's null bitmap (NULL if none)
* offset: 0-based linear element number of array element
*/
static bool
set_get_isnull(const bits8 *nullbitmap, int offset)
{
if (nullbitmap == NULL)
return false; /* assume not null */
if (nullbitmap[offset / 8] & (1 << (offset % 8)))
return false; /* not null */
return true;
}
/*
* array_isspace() --- a non-locale-dependent isspace()
*
* We used to use isspace() for parsing array values, but that has
* undesirable results: an array value might be silently interpreted
* differently depending on the locale setting. Now we just hard-wire
* the traditional ASCII definition of isspace().
*/
static bool
set_isspace(char ch)
{
if (ch == ' ' ||
ch == '\t' ||
ch == '\n' ||
ch == '\r' ||
ch == '\v' ||
ch == '\f')
return true;
return false;
}
/*
* ReadSetStr :
* parses the array string pointed to by "arrayStr" and converts the values
* to internal format. Unspecified elements are initialized to nulls.
* The array dimensions must already have been determined.
*
* Inputs:
* arrayStr: the string to parse.
* CAUTION: the contents of "arrayStr" will be modified!
* origStr: the unmodified input string, used only in error messages.
* nitems: total number of array elements, as already determined.
* ndim: number of array dimensions
* dim[]: array axis lengths
* inputproc: type-specific input procedure for element datatype.
* typioparam, typmod: auxiliary values to pass to inputproc.
* typdelim: the value delimiter (type-specific).
* typlen, typbyval, typalign: storage parameters of element datatype.
*
* Outputs:
* values[]: filled with converted data values.
* nulls[]: filled with is-null markers.
* *hasnulls: set true iff there are any null elements.
* *nbytes: set to total size of data area needed (including alignment
* padding but not including array header overhead).
* *escontext: if this points to an ErrorSaveContext, details of
* any error are reported there.
*
* Result:
* true for success, false for failure (if escontext is provided).
*
* Note that values[] and nulls[] are allocated by the caller, and must have
* nitems elements.
*/
typedef enum
{
SET_NO_LEVEL,
SET_LEVEL_STARTED,
SET_ELEM_STARTED,
SET_ELEM_COMPLETED,
SET_QUOTED_ELEM_STARTED,
SET_QUOTED_ELEM_COMPLETED,
SET_ELEM_DELIMITED,
SET_LEVEL_COMPLETED,
SET_LEVEL_DELIMITED
} SetParseState;
/*
* SetCount
* Determines the dimensions for an array string.
*
* Returns number of dimensions as function result. The axis lengths are
* returned in dim[], which must be of size MAXDIM.
*
* If we detect an error, fill *escontext with error details and return -1
* (unless escontext isn't provided, in which case errors will be thrown).
*/
#undef MAXDIM
#define MAXDIM 1
static int
SetCount(const char *str, int *dim, char typdelim, Node *escontext)
{
int nest_level = 0,
i;
int ndim = 1,
temp[MAXDIM],
nelems[MAXDIM],
nelems_last[MAXDIM];
bool in_quotes = false;
bool eoArray = false;
bool empty_array = true;
const char *ptr;
SetParseState parse_state = SET_NO_LEVEL;
for (i = 0; i < MAXDIM; ++i)
{
temp[i] = dim[i] = nelems_last[i] = 0;
nelems[i] = 1;
}
ptr = str;
while (!eoArray)
{
bool itemdone = false;
while (!itemdone)
{
if (parse_state == SET_ELEM_STARTED ||
parse_state == SET_QUOTED_ELEM_STARTED)
empty_array = false;
switch (*ptr)
{
case '\0':
/* Signal a premature end of the string */
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected end of input.")));
case '\\':
/*
* An escape must be after a level start, after an element
* start, or after an element delimiter. In any case we
* now must be past an element start.
*/
if (parse_state != SET_LEVEL_STARTED &&
parse_state != SET_ELEM_STARTED &&
parse_state != SET_QUOTED_ELEM_STARTED &&
parse_state != SET_ELEM_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
'\\')));
if (parse_state != SET_QUOTED_ELEM_STARTED)
parse_state = SET_ELEM_STARTED;
/* skip the escaped character */
if (*(ptr + 1))
ptr++;
else
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected end of input.")));
break;
case '"':
/*
* A quote must be after a level start, after a quoted
* element start, or after an element delimiter. In any
* case we now must be past an element start.
*/
if (parse_state != SET_LEVEL_STARTED &&
parse_state != SET_QUOTED_ELEM_STARTED &&
parse_state != SET_ELEM_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected array element.")));
in_quotes = !in_quotes;
if (in_quotes)
parse_state = SET_QUOTED_ELEM_STARTED;
else
parse_state = SET_QUOTED_ELEM_COMPLETED;
break;
case '{':
if (!in_quotes)
{
/*
* A left brace can occur if no nesting has occurred
* yet, after a level start, or after a level
* delimiter.
*/
if (parse_state != SET_NO_LEVEL &&
parse_state != SET_LEVEL_STARTED &&
parse_state != SET_LEVEL_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
'{')));
parse_state = SET_LEVEL_STARTED;
if (nest_level >= MAXDIM)
ereturn(escontext, -1,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
nest_level + 1, MAXDIM)));
temp[nest_level] = 0;
nest_level++;
if (ndim < nest_level)
ndim = nest_level;
}
break;
case '}':
if (!in_quotes)
{
/*
* A right brace can occur after an element start, an
* element completion, a quoted element completion, or
* a level completion.
*/
if (parse_state != SET_ELEM_STARTED &&
parse_state != SET_ELEM_COMPLETED &&
parse_state != SET_QUOTED_ELEM_COMPLETED &&
parse_state != SET_LEVEL_COMPLETED &&
!(nest_level == 1 && parse_state == SET_LEVEL_STARTED))
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
'}')));
parse_state = SET_LEVEL_COMPLETED;
if (nest_level == 0)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unmatched \"%c\" character.", '}')));
nest_level--;
if (nelems_last[nest_level] != 0 &&
nelems[nest_level] != nelems_last[nest_level])
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Multidimensional arrays must have "
"sub-arrays with matching "
"dimensions.")));
nelems_last[nest_level] = nelems[nest_level];
nelems[nest_level] = 1;
if (nest_level == 0)
eoArray = itemdone = true;
else
{
/*
* We don't set itemdone here; see comments in
* ReadSetStr
*/
temp[nest_level - 1]++;
}
}
break;
default:
if (!in_quotes)
{
if (*ptr == typdelim)
{
/*
* Delimiters can occur after an element start, an
* element completion, a quoted element
* completion, or a level completion.
*/
if (parse_state != SET_ELEM_STARTED &&
parse_state != SET_ELEM_COMPLETED &&
parse_state != SET_QUOTED_ELEM_COMPLETED &&
parse_state != SET_LEVEL_COMPLETED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
typdelim)));
if (parse_state == SET_LEVEL_COMPLETED)
parse_state = SET_LEVEL_DELIMITED;
else
parse_state = SET_ELEM_DELIMITED;
itemdone = true;
nelems[nest_level - 1]++;
}
else if (!set_isspace(*ptr))
{
/*
* Other non-space characters must be after a
* level start, after an element start, or after
* an element delimiter. In any case we now must
* be past an element start.
*/
if (parse_state != SET_LEVEL_STARTED &&
parse_state != SET_ELEM_STARTED &&
parse_state != SET_ELEM_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected array element.")));
parse_state = SET_ELEM_STARTED;
}
}
break;
}
if (!itemdone)
ptr++;
}
temp[ndim - 1]++;
ptr++;
}
/* only whitespace is allowed after the closing brace */
while (*ptr)
{
if (!set_isspace(*ptr++))
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Junk after closing right brace.")));
}
/* special case for an empty array */
if (empty_array)
return 0;
for (i = 0; i < ndim; ++i)
dim[i] = temp[i];
return ndim;
}
bool Array_nulls = true;
static bool
ReadSetStr(char *arrayStr,
const char *origStr,
int nitems,
int ndim,
int *dim,
FmgrInfo *inputproc,
Oid typioparam,
int32 typmod,
char typdelim,
int typlen,
bool typbyval,
char typalign,
Datum *values,
bool *nulls,
bool *hasnulls,
int32 *nbytes,
Node *escontext)
{
int i;
char *srcptr;
bool in_quotes = false;
bool eoArray = false;
bool hasnull;
int32 totbytes;
int indx = 0;
/* Initialize is-null markers to true */
memset(nulls, true, nitems * sizeof(bool));
/*
* We have to remove " and \ characters to create a clean item value to
* pass to the datatype input routine. We overwrite each item value
* in-place within arrayStr to do this. srcptr is the current scan point,
* and dstptr is where we are copying to.
*
* We also want to suppress leading and trailing unquoted whitespace. We
* use the leadingspace flag to suppress leading space. Trailing space is
* tracked by using dstendptr to point to the last significant output
* character.
*
* The error checking in this routine is mostly pro-forma, since we expect
* that SetCount() already validated the string. So we don't bother
* with errdetail messages.
*/
srcptr = arrayStr;
while (!eoArray)
{
bool itemdone = false;
bool leadingspace = true;
bool hasquoting = false;
char *itemstart;
char *dstptr;
char *dstendptr;
itemstart = dstptr = dstendptr = srcptr;
while (!itemdone)
{
switch(*srcptr)
{
case '\0':
/* Signal a premature end of the string */
/* Signal a premature end of the string */
ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"",
origStr)));
break;
case '\\':
/* Skip backslash, copy next character as-is. */
srcptr++;
if (*srcptr == '\0')
ereturn(escontext,false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"",
origStr)));
/* Treat the escaped character as non-whitespace*/
leadingspace = false;
dstendptr = dstptr;
hasquoting = false; /* can't be a NULL marker */
break;
case '"':
in_quotes = !in_quotes;
if (in_quotes)
leadingspace = false;
else
{
/*
* Advance dstendptr when we exit in_quotes; this
* saves having to do it in all the other in_quotes
* cases.
*/
dstendptr = dstptr;
}
hasquoting = true; /* can't be a NULL marker */
srcptr++;
break;
case '{':
if (!in_quotes)
{
srcptr++;
}
else
*dstptr++ = *srcptr++;
break;
case '}':
if (!in_quotes)
{
eoArray = itemdone = true;
srcptr++;
}
else
*dstptr++ = *srcptr++;
break;
default :
if(in_quotes)
*dstptr++ = *srcptr++;
else if (*srcptr == typdelim)
{
itemdone = true;
srcptr ++;
}
else if (set_isspace(*srcptr))
{
/*
* If leading space, drop it immediately. Else, copy
* but don't advance dstendptr.
*/
if(leadingspace)
srcptr++;
else
*dstptr++ = *srcptr++;
}
else
{
*dstptr++ = *srcptr++;
leadingspace = false;
dstendptr = dstptr;
}
break;
}
}
Assert(dstptr < srcptr);
*dstendptr = '\0';
if (Array_nulls && !hasquoting &&
pg_strcasecmp(itemstart, "NULL") == 0)
{
/* it's a NULL item */
if (!InputFunctionCallSafe(inputproc, NULL,
typioparam, typmod,
escontext,
&values[indx]))
return false;
nulls[indx] = true;
indx++;
}
else
{
elog(INFO,"line[%04d] indx:%d itemstart:%s, typioparam:%d,typmod:%d",__LINE__,indx,itemstart, typioparam,typmod);
if (!InputFunctionCallSafe(inputproc, itemstart,
typioparam, typmod,
escontext,
&values[indx]))
return false;
nulls[indx] = false;
indx++;
}
}
/*
* Check for nulls, compute total data space needed
*/
hasnull = false;
totbytes = 0;
elog(INFO,"line[%04d] nitems=%d, dim=%d",__LINE__,nitems,*dim);
for (i = 0; i < nitems; i++)
{
if(nulls[i])
hasnull = true;
else
{
/* let's just make sure data is not toasted */
if (typlen == -1)
values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
totbytes = att_addlength_datum(totbytes, typlen, values[i]);
totbytes = att_align_nominal(totbytes, typalign);
elog(INFO,"line[%04d] total bytes: %d",__LINE__,totbytes);
/* check for overflow of total request */
if (!AllocSizeIsValid(totbytes))
ereturn(escontext, false,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("array size exceed the maximum allowed (%d)",
(int) MaxAllocSize)));
}
}
*hasnulls = hasnull;
*nbytes = totbytes;
elog(INFO,"LINE[%04d] total bytes: %d, hasnulls:%d",__LINE__, totbytes, *hasnulls);
return true;
}
/*
* Copy datum to *dest and return total space used (including align padding)
*
* Caller must have handled case of NULL element
*/
//imitate array_in.
Datum
set_in_out_test(PG_FUNCTION_ARGS)
{
char *string = PG_GETARG_CSTRING(0); /* external form */
Oid element_type = PG_GETARG_OID(1); /* type of an array * element */
int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */
Node *escontext = fcinfo->context;
int typlen;
bool typbyval;
char typalign;
char typdelim = ',';
Oid typioparam;
char *string_save,
*p;
int nitems;
Datum *dataPtr;
bool *nullsPtr;
bool hasnulls;
int32 nbytes;
int32 dataoffset;
SetType *retval;
int dim;
int ndim;
SetMetaState *my_extra;
int capacity =64;
bits8 *nullmap;
/*
* We arrange to look up info about element type, including its input
* conversion proc, only once per series of calls, assuming the element
* type doesn't change underneath us.
*/
my_extra = (SetMetaState *) fcinfo->flinfo->fn_extra;
if (my_extra == NULL)
{
elog(INFO,"line[%04d] my_extra == NULL looped",__LINE__);
fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt
,sizeof(SetMetaState));
my_extra = (SetMetaState *) fcinfo->flinfo->fn_extra;
my_extra->element_type = ~element_type;
}
if (my_extra->element_type != element_type)
{
elog(INFO,"line[%04d] my_extra->element_type != element_type looped",__LINE__);
/*
* Get info about element type, including its input conversion proc
*/
get_type_io_data(element_type,IOFunc_input,
&my_extra->typlen,&my_extra->typbyval,
&my_extra->typalign, &my_extra->typdelim,
&my_extra->typioparam,&my_extra->typiofunc);
fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
fcinfo->flinfo->fn_mcxt);
my_extra->element_type = element_type;
}
typlen = my_extra->typlen;
typbyval = my_extra->typbyval;
typalign = my_extra->typalign;
typdelim = my_extra->typdelim;
typioparam = my_extra->typioparam;
elog(INFO,"line[%04d]: typlen: %d, typbyval:%d typalign %c typdelim %c typioparam %d"
,__LINE__,typlen,typbyval, typalign, typdelim, typioparam);
ndim = SetCount(string,&dim,typdelim, escontext);
nitems = dim;
elog(INFO,"line[%04d]: ndim:%d dim:%d nitems:%d",__LINE__,ndim,dim, nitems);
/* Empty set? */
if (nitems == 0)
{
// PG_RETURN_SETTYPE_P(construct_empty_set(element_type));
elog(INFO,"line %04d empty set. now leaving",__LINE__);
PG_RETURN_BOOL(true);
}
dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
nullsPtr = (bool *) palloc(nitems * sizeof(bool));
/* Make a modifiable copy of the input */
string_save = pstrdup(string);
/*
* If the input string starts with dimension info, read and use that.
* Otherwise, we require the input to be in curly-brace style, and we
* prescan the input to determine dimensions.
*
* Dimension info takes the form of one or more [n] or [m:n] items. The
* outer loop iterates once per dimension item.
*/
p = string_save;
elog(INFO,"line %d, function oid : %d",__LINE__,*(&my_extra->proc.fn_oid));
Assert(nitems == dim);
if (!ReadSetStr(p, string,
nitems, ndim, &dim,
&my_extra->proc, typioparam, typmod,
typdelim,
typlen, typbyval, typalign,
dataPtr, nullsPtr,
&hasnulls, &nbytes, escontext))
{
elog(INFO,"LINE[%04d] ReadSetStr FALSE, nbytes %d, hasnulls %d dim:%d",__LINE__,nbytes,hasnulls,dim);
PG_RETURN_BOOL(false);
}
elog(INFO,"LINE[%04d] ReadSetStr OK, nbytes %d, hasnulls %d dim:%d",__LINE__,nbytes,hasnulls,dim);
Assert(ndim == 1); // or deal with ndim = 0;
// compute init size needed initial.
if (hasnulls)
{
dataoffset = SET_OVERHEAD_WITHNULLS(capacity, nitems);
if (capacity < nitems)
capacity = nitems + 7 - (nitems + 7) % 8;
nbytes = capacity * typlen;
nbytes += dataoffset;
elog(INFO,"LINE[%04d] capacity:%d nitems:%d allocated bytes: %d SET_OVERHEAD_WITHNULLS :%ld"
,__LINE__,capacity, nitems,nbytes,SET_OVERHEAD_WITHNULLS(capacity, nitems));
}
else
{
//init size no nulls
dataoffset = 0;
if (capacity < nitems)
capacity = nitems + 7 - (nitems + 7) % 8;
nbytes = capacity * typlen;
nbytes += SET_OVERHEAD_NONULLS(capacity);
elog(INFO,"LINE[%04d] SET_OVERHEAD_NONULLS: %ld allocated bytes: %d"
,__LINE__,SET_OVERHEAD_NONULLS(capacity), nbytes);
}
//allocate bytes and set some SetType property.
retval = (SetType *) palloc0(nbytes);
SET_VARSIZE(retval, nbytes);
retval->capacity = capacity;
retval->dataoffset = dataoffset;
retval->nelements = 0;
retval->elemtype = element_type;
elog(INFO,"LINE[%04d] SetType meta info: retval->nelements:%d, retval->capacity:%d,retval->elemtype:%d,retval->dataoffset:%d"
,__LINE__,retval->nelements, retval->capacity,retval->elemtype,retval->dataoffset);
//copy nullsPtr value to retval.
CopySetNulls(retval,dataPtr,nullsPtr,nitems,typlen,typbyval,typalign,true);
switch(element_type)
{
case 23:
// retval = set_add_int32(retval, dataPtr,&nitems);
for(int i = 0; i < nitems; i++)
{
set_add_int32(retval, dataPtr[i]);
}
break;
case 20:
for(int i = 0; i < nitems; i++)
{
set_add_int64(retval, dataPtr[i]);
}
break;
case 1114:
for(int i = 0; i < nitems; i++)
{
set_add_timestamp(retval, dataPtr[i]);
}
break;
case 1186:
for(int i = 0; i < nitems; i++)
{
set_add_interval(retval, dataPtr[i]);
}
elog(INFO,"now retval should have %d interval element oid:%d", retval->nelements,retval->elemtype);
break;
case 2950:
for(int i = 0; i < nitems; i++)
{
set_add_uuid(retval, dataPtr[i]);
}
elog(INFO,"now retval should have %d interval element oid:%d", retval->nelements,retval->elemtype);
break;
default:
elog(INFO,"line[%04d] now only support x type",__LINE__);
}
elog(INFO,"LINE[%04d] SetType meta info: retval->nelements:%d, retval->capacity:%d,retval->elemtype:%d,retval->dataoffset:%d"
,__LINE__,retval->nelements, retval->capacity,retval->elemtype,retval->dataoffset);
//set out test.
char *generic_setout;
generic_setout = SetOut(retval);
elog(INFO,"line[%04d]: set output:%s",__LINE__,generic_setout);
pfree(dataPtr);
pfree(nullsPtr);
// PG_RETURN_ARRAYTYPE_P(retval);
PG_RETURN_BOOL(true);
}
/*
imitate CopyArrayEls. only copy nulls
* Copy null into an array object from a temporary array of Datums.
*
* array: set object (with header fields already filled in)
* nulls: array of is-null flags (can be NULL if no nulls)
* nitems: number of Datums to be copied
* typbyval, typlen, typalign: info about element datatype
* freedata: if true and element type is pass-by-ref, pfree data values
* referenced by Datums after copying them.
*
* If the input data is of varlena type, the caller must have ensured that
* the values are not toasted. (Doing it here doesn't work since the
* caller has already allocated space for the array...)
*/
void
CopySetNulls(SetType *set,
Datum *values,
bool *nulls,
int nitems,
int typlen,
bool typbyval,
char typalign,
bool freedata)
{
char *p = SET_DATA_PTR(set);
bits8 *bitmap = SET_NULLBITMAP(set);
int bitval = 0;
int bitmask = 1;
int i;
if (typbyval)
freedata = false;
for (i = 0; i < nitems; i++)
{
if (nulls && nulls[i])
{
if (!bitmap) /* shouldn't happen */
elog(ERROR, "null array element where not supported");
/* bitmap bit stays 0 */
}
else
{
bitval |= bitmask;
}
if (bitmap)
{
bitmask <<= 1;
if (bitmask == 0x100)
{
*bitmap++ = bitval;
bitval = 0;
bitmask = 1;
}
}
}
if (bitmap && bitmask != 1)
*bitmap = bitval;
}
SetType *
set_add_int32(SetType *set, Datum dvalue)
{
int byte;
int bit;
uint32 hash;
uint32 position;
char *bitmap;
int32 *values; // set actual data begins.
bitmap = SET_BITMAP(set);
values = (int32 *) SET_DATA_PTR(set); // cannot cat to Datum (that's 8 byte). not sure this part.
int32 value = DatumGetInt32(dvalue);
hash = hash_bytes_uint32((uint32) value);
position = hash % set->capacity;
while (true)
{
byte = (position / 8);
bit = (position % 8);
/* The item is already used - maybe it's the same value? */
if (bitmap[byte] & (0x01 << bit))
{
/* Same value, we're done */
if (values[position] == value)
break;
position = (position + HASHSET_STEP) % set->capacity;
continue;
}
/* Found an empty spot, before hitting the value first */
bitmap[byte] |= (0x01 << bit);
values[position] = value;
set->nelements++;
break;
}
return set;
}
SetType *
set_add_int64(SetType *set, Datum dvalue)
{
int byte;
int bit;
uint32 hash;
uint32 position;
char *bitmap;
Datum *datums; // set actual data begins.
Datum tempd;
hash = DatumGetUInt32(DirectFunctionCall1(hashint8,dvalue));
position = hash % set->capacity;
bitmap = SET_BITMAP(set);
datums = (Datum *)SET_DATA_PTR(set);
while (true)
{
byte = (position / 8);
bit = (position % 8);
/* The item is already used - maybe it's the same value? */
if (bitmap[byte] & (0x01 << bit))
{
/* Same value, we're done */
if (DatumGetBool(DirectFunctionCall2(int8eq,datums[position],dvalue)))
break;
position = (position + HASHSET_STEP) % set->capacity;
continue;
}
/* Found an empty spot, before hitting the value first */
bitmap[byte] |= (0x01 << bit);
datums[position] = dvalue;
set->nelements++;
break;
}
return set;
}
SetType *
set_add_timestamp(SetType *set, Datum dvalue)
{
int byte;
int bit;
uint32 hash;
uint32 position;
char *bitmap;
Datum *datums; // set actual data begins.
Datum tempd;
tempd = DirectFunctionCall1(timestamp_hash,dvalue);
hash = DatumGetUInt32(tempd);
position = hash % set->capacity;
bitmap = SET_BITMAP(set);
datums = (Datum *)SET_DATA_PTR(set);
while (true)
{
byte = (position / 8);
bit = (position % 8);
/* The item is already used - maybe it's the same value? */
if (bitmap[byte] & (0x01 << bit))
{
/* Same value, we're done */
if (DatumGetBool(
DirectFunctionCall2(timestamp_eq,datums[position],dvalue)))
break;
position = (position + HASHSET_STEP) % set->capacity;
continue;
}
/* Found an empty spot, before hitting the value first */
bitmap[byte] |= (0x01 << bit);
datums[position] = dvalue;
set->nelements++;
break;
}
return set;
}
SetType *set_add_uuid(SetType *set, Datum dvalue)
{
int byte;
int bit;
uint32 hash;
uint32 position;
char *bitmap;
Datum *datums; // set actual data begins.
Datum tempd;
tempd = DirectFunctionCall1(uuid_hash,dvalue);
hash = DatumGetUInt32(tempd);
position = hash % set->capacity;
bitmap = SET_BITMAP(set);
datums = (Datum *)SET_DATA_PTR(set);
while (true)
{
byte = (position / 8);
bit = (position % 8);
/* The item is already used - maybe it's the same value? */
if (bitmap[byte] & (0x01 << bit))
{
/* Same value, we're done */
if (DatumGetBool(DirectFunctionCall2(uuid_eq,datums[position],dvalue)))
break;
position = (position + HASHSET_STEP) % set->capacity;
continue;
}
/* Found an empty spot, before hitting the value first */
bitmap[byte] |= (0x01 << bit);
datums[position] = dvalue;
set->nelements++;
break;
}
return set;
}
SetType *
set_add_interval(SetType *set, Datum dvalue)
{
int byte;
int bit;
uint32 hash;
uint32 position;
char *bitmap;
Datum *datums; // set actual data begins.
Datum tempd;
tempd = DirectFunctionCall1(interval_hash,dvalue);
hash = DatumGetUInt32(tempd);
position = hash % set->capacity;
bitmap = SET_BITMAP(set);
datums = (Datum *)SET_DATA_PTR(set);
while (true)
{
byte = (position / 8);
bit = (position % 8);
/* The item is already used - maybe it's the same value? */
if (bitmap[byte] & (0x01 << bit))
{
/* Same value, we're done */
if (DatumGetBool(DirectFunctionCall2(interval_eq,datums[position],dvalue)))
break;
position = (position + HASHSET_STEP) % set->capacity;
continue;
}
/* Found an empty spot, before hitting the value first */
bitmap[byte] |= (0x01 << bit);
datums[position] = dvalue;
set->nelements++;
break;
}
return set;
}
/*
* construct_empty_set. --- make a zero-dimensional set of given type
*/
SetType *
construct_empty_set(Oid elmtype)
{
SetType *result;
result = (SetType *) palloc0(sizeof(SetType));
SET_VARSIZE(result, sizeof(SetType));
result->capacity = 0;
result->dataoffset = 0;
result->elemtype = elmtype;
return result;
}
char *
SetOut(SetType *retval)
{
StringInfoData str;
/* Initialize the StringInfo buffer */
initStringInfo(&str);
/* Append the opening brace for the output hashset string */
appendStringInfoChar(&str, '{');
char *bitmap = SET_BITMAP(retval);
//int out.
if ( retval->elemtype == 23)
{
int32 *output = (int32 *) SET_DATA_PTR(retval);
/* Loop through the elements and append them to the string */
for(int i = 0; i < retval->capacity; i++)
{
int byte = i / 8;
int bit = i % 8;
/* Check if the bit in the bitmap is set */
if (bitmap[byte] & (0x01 << bit))
{
/* Append the value */
if (str.len > 1)
appendStringInfoChar(&str, ',');
appendStringInfo(&str, "%d", output[i]);
}
}
}
//int64 out
if (retval->elemtype == 20)
{
Datum *datums = (Datum *) SET_DATA_PTR(retval);
/* Loop through the elements and append them to the string */
for(int i = 0; i < retval->capacity; i++)
{
int byte = i / 8;
int bit = i % 8;
/* Check if the bit in the bitmap is set */
if (bitmap[byte] & (0x01 << bit))
{
/* Append the value */
if (str.len > 1)
appendStringInfoChar(&str, ',');
appendStringInfo(&str, "%ld", DatumGetInt64(datums[i]));
}
}
}
//timestamp out
if (retval->elemtype == 1114)
{
Datum *datums = (Datum *) SET_DATA_PTR(retval);
for (int i = 0; i < retval->capacity; i++)
{
int byte = i / 8;
int bit = i % 8;
/* Check if the bit in the bitmap is set */
if (bitmap[byte] & (0x01 << bit))
{
/* Append the value */
if (str.len > 1)
appendStringInfoChar(&str, ',');
appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(timestamp_out,datums[i])));
}
}
}
// interval out
if ( retval->elemtype == 1186)
{
Datum *datums = (Datum *) SET_DATA_PTR(retval);
for (int i = 0; i < retval->capacity; i++)
{
int byte = i / 8;
int bit = i % 8;
/* Check if the bit in the bitmap is set */
if (bitmap[byte] & (0x01 << bit))
{
/* Append the value */
if (str.len > 1)
appendStringInfoChar(&str, ',');
appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(interval_out,datums[i])));
}
}
}
//uuid out
if ( retval->elemtype == 2950)
{
Datum *datums = (Datum *) SET_DATA_PTR(retval);
for (int i = 0; i < retval->capacity; i++)
{
int byte = i / 8;
int bit = i % 8;
/* Check if the bit in the bitmap is set */
if (bitmap[byte] & (0x01 << bit))
{
/* Append the value */
if (str.len > 1)
appendStringInfoChar(&str, ',');
appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(uuid_out,datums[i])));
}
}
}
/* Append the closing brace for the output hashset string */
appendStringInfoChar(&str, '}');
return str.data;
}