On Mon, Jun 26, 2023 at 4:36 AM Joel Jacobson <j...@compiler.org> wrote: > > On Sun, Jun 25, 2023, at 11:42, Joel Jacobson wrote: > > SELECT hashset_contains('{}'::int4hashset, NULL::int); > > > > would be False, according to the General Rules. > > > ... > > Applying the same rules, we'd have to return Unknown (which we represent as > > null) for: > > > > SELECT hashset_contains('{null}'::int4hashset, NULL::int); > > > > Aha! I just discovered to my surprise that the corresponding array > queries gives the same result: > > SELECT NULL = ANY(ARRAY[]::int[]); > ?column? > ---------- > f > (1 row) > > SELECT NULL = ANY(ARRAY[NULL]::int[]); > ?column? > ---------- > > (1 row) > > I have no more objections; let's stick to the same null semantics as arrays and multisets. > > /Joel
Can you try to glue the attached to the hashset data type input function. the attached will parse cstring with double quote and not. so '{1,2,3}' == '{"1","2","3"}'. obviously quote will preserve the inner string as is. currently int4hashset input is delimited by comma, if you want deal with range then you need escape the comma.
/* gcc -I/home/jian/postgres/2023_05_25_beta5421/include/server -fPIC -c /home/jian/Desktop/regress_pgsql/input_validate.c gcc -shared -o /home/jian/Desktop/regress_pgsql/input_validate.so /home/jian/Desktop/regress_pgsql/input_validate.o CREATE OR REPLACE FUNCTION str_delim_count_validate(cstring) RETURNS BOOL SET search_path from current AS '/home/jian/Desktop/regress_pgsql/input_validate', 'str_delim_count_validate' LANGUAGE C IMMUTABLE; select str_delim_count_validate('{"23890","2","3", "a",1,2,3,4,NULL,2022-01-01,"[1,2]"}'); select str_delim_count_validate('{"3 ", }'); --fail select str_delim_count_validate('{"3 " }'); --ok select str_delim_count_validate('{"""23890"}'); --fail. select str_delim_count_validate('{}'); --ok select str_delim_count_validate('}'); --fail. select str_delim_count_validate('{'); --fail. select str_delim_count_validate('{{}}'); --fail. select str_delim_count_validate('{{}}'); --fail. select str_delim_count_validate('{"22022-01-01,[1,2]}'); --fail. select str_delim_count_validate('{" 2022-01-01 "}'); --ok select str_delim_count_validate('{ 2022-01-01 }'); --ok select str_delim_count_validate('{ 2022-01-01 ,"[1,2]"} '); --ok select str_delim_count_validate('{ 2023-06-26 16:45:02.454293+08 ,"2","3"}'); --ok. select str_delim_count_validate('{"\\t"}'); --ok */ #include "postgres.h" #include "access/htup_details.h" #include "catalog/pg_type.h" #include "utils/builtins.h" #include "utils/numeric.h" #include "funcapi.h" #include "utils/lsyscache.h" #include "utils/fmgrprotos.h" #include "common/hashfn.h" PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(str_delim_count_validate); static int SetCount(const char *str, char typdelim, Node *escontext); static bool ReadSetStr(char *arrayStr,const char *origStr, char typdelim, Node *escontext); static bool set_isspace(char ch); Datum str_delim_count_validate(PG_FUNCTION_ARGS) { char *string = PG_GETARG_CSTRING(0); char *string_save; char *p; /* Make a modifiable copy of the input */ string_save = pstrdup(string); char typdelim = ','; p = string_save; int nitems; nitems = SetCount(p,typdelim, fcinfo->context); if (!ReadSetStr(p, string,typdelim,fcinfo->context)) elog(INFO,"delimuite str failed"); elog(INFO,"line %d nitems=%d",__LINE__,nitems); PG_RETURN_BOOL(true); } /* * array_isspace() --- a non-locale-dependent isspace() * * We used to use isspace() for parsing array values, but that has * undesirable results: an array value might be silently interpreted * differently depending on the locale setting. Now we just hard-wire * the traditional ASCII definition of isspace(). */ static bool set_isspace(char ch) { if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') return true; return false; } static bool ReadSetStr(char *arrayStr,const char *origStr, char typdelim, Node *escontext) { int i; char *srcptr; bool in_quotes = false; bool eoArray = false; bool hasnull; int32 totbytes; int indx = 0; /* * We have to remove " and \ characters to create a clean item value to * pass to the datatype input routine. We overwrite each item value * in-place within arrayStr to do this. srcptr is the current scan point, * and dstptr is where we are copying to. * * We also want to suppress leading and trailing unquoted whitespace. We * use the leadingspace flag to suppress leading space. Trailing space is * tracked by using dstendptr to point to the last significant output * character. * * The error checking in this routine is mostly pro-forma, since we expect * that SetCount() already validated the string. So we don't bother * with errdetail messages. */ srcptr = arrayStr; while (!eoArray) { bool itemdone = false; bool leadingspace = true; bool hasquoting = false; char *itemstart; char *dstptr; char *dstendptr; itemstart = dstptr = dstendptr = srcptr; while (!itemdone) { switch(*srcptr) { case '\0': /* Signal a premature end of the string */ ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); break; case '\\': /* Skip backslash, copy next character as-is. */ srcptr++; if (*srcptr == '\0') ereturn(escontext,false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); *dstptr++ = *srcptr++; /* Treat the escaped character as non-whitespace */ leadingspace = false; dstendptr = dstptr; hasquoting = true; /* can't be a NULL marker */ break; case '"': in_quotes = !in_quotes; if (in_quotes) leadingspace = false; else { /* * Advance dstendptr when we exit in_quotes; this * saves having to do it in all the other in_quotes * cases. */ dstendptr = dstptr; } hasquoting = true; /* can't be a NULL marker */ srcptr++; break; case '{': if (!in_quotes) { srcptr++; } else *dstptr++ = *srcptr++; break; case '}': if (!in_quotes) { eoArray = itemdone = true; srcptr++; } else *dstptr++ = *srcptr++; break; default : if(in_quotes) *dstptr++ = *srcptr++; else if (*srcptr == typdelim) { itemdone = true; srcptr ++; } else if (set_isspace(*srcptr)) { /* * If leading space, drop it immediately. Else, copy * but don't advance dstendptr. */ if(leadingspace) srcptr++; else *dstptr++ = *srcptr++; } else { *dstptr++ = *srcptr++; leadingspace = false; dstendptr = dstptr; } break; } } Assert(dstptr < srcptr); *dstendptr = '\0'; elog(INFO,"line [%04d] itemstart:|%s|",__LINE__,itemstart); } return true; } typedef enum { SET_NO_LEVEL, SET_LEVEL_STARTED, SET_ELEM_STARTED, SET_ELEM_COMPLETED, SET_QUOTED_ELEM_STARTED, SET_QUOTED_ELEM_COMPLETED, SET_ELEM_DELIMITED, SET_LEVEL_COMPLETED, SET_LEVEL_DELIMITED } SetParseState; /* * SetCount * Determines the dimensions for an array string. * * Returns number of dimensions as function result. The axis lengths are * returned in dim[], which must be of size MAXDIM. * * If we detect an error, fill *escontext with error details and return -1 * (unless escontext isn't provided, in which case errors will be thrown). */ static int SetCount(const char *str, char typdelim, Node *escontext) { int nest_level = 0, nelems = 1; bool in_quotes = false; bool eoArray = false; bool empty_array = true; const char *ptr; SetParseState parse_state = SET_NO_LEVEL; ptr = str; while (!eoArray) { bool itemdone = false; while (!itemdone) { if (parse_state == SET_ELEM_STARTED || parse_state == SET_QUOTED_ELEM_STARTED) empty_array = false; switch (*ptr) { case '\0': /* Signal a premature end of the string */ ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); case '\\': /* * An escape must be after a level start, after an element * start, or after an element delimiter. In any case we * now must be past an element start. */ if (parse_state != SET_LEVEL_STARTED && parse_state != SET_ELEM_STARTED && parse_state != SET_QUOTED_ELEM_STARTED && parse_state != SET_ELEM_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '\\'))); if (parse_state != SET_QUOTED_ELEM_STARTED) parse_state = SET_ELEM_STARTED; /* skip the escaped character */ if (*(ptr + 1)) ptr++; else ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); break; case '"': /* * A quote must be after a level start, after a quoted * element start, or after an element delimiter. In any * case we now must be past an element start. */ if (parse_state != SET_LEVEL_STARTED && parse_state != SET_QUOTED_ELEM_STARTED && parse_state != SET_ELEM_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); in_quotes = !in_quotes; if (in_quotes) parse_state = SET_QUOTED_ELEM_STARTED; else parse_state = SET_QUOTED_ELEM_COMPLETED; break; case '{': if (!in_quotes) { /* * A left brace can occur if no nesting has occurred * yet, after a level start, or after a level * delimiter. */ if (parse_state != SET_NO_LEVEL && parse_state != SET_LEVEL_STARTED && parse_state != SET_LEVEL_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '{'))); parse_state = SET_LEVEL_STARTED; if (nest_level >= 1) ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", nest_level + 1, 1))); nest_level++; } break; case '}': if (!in_quotes) { /* * A right brace can occur after an element start, an * element completion, a quoted element completion, or * a level completion. */ if (parse_state != SET_ELEM_STARTED && parse_state != SET_ELEM_COMPLETED && parse_state != SET_QUOTED_ELEM_COMPLETED && parse_state != SET_LEVEL_COMPLETED && !(nest_level == 1 && parse_state == SET_LEVEL_STARTED)) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '}'))); parse_state = SET_LEVEL_COMPLETED; if (nest_level == 0) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unmatched \"%c\" character.", '}'))); nest_level--; if (nest_level == 0) eoArray = itemdone = true; } break; default: if (!in_quotes) { if (*ptr == typdelim) { /* * Delimiters can occur after an element start, an * element completion, a quoted element * completion, or a level completion. */ if (parse_state != SET_ELEM_STARTED && parse_state != SET_ELEM_COMPLETED && parse_state != SET_QUOTED_ELEM_COMPLETED && parse_state != SET_LEVEL_COMPLETED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", typdelim))); if (parse_state == SET_LEVEL_COMPLETED) parse_state = SET_LEVEL_DELIMITED; else parse_state = SET_ELEM_DELIMITED; itemdone = true; nelems++; } else if (!set_isspace(*ptr)) { /* * Other non-space characters must be after a * level start, after an element start, or after * an element delimiter. In any case we now must * be past an element start. */ if (parse_state != SET_LEVEL_STARTED && parse_state != SET_ELEM_STARTED && parse_state != SET_ELEM_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); parse_state = SET_ELEM_STARTED; } } break; } if (!itemdone) ptr++; } ptr++; } /* only whitespace is allowed after the closing brace */ while (*ptr) { if (!set_isspace(*ptr++)) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Junk after closing right brace."))); } /* special case for an empty array */ if (empty_array) return 0; else return nelems; }