This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch partial_agg
in repository https://gitbox.apache.org/repos/asf/datasketches-postgresql.git
The following commit(s) were added to refs/heads/partial_agg by this push:
new 1658a0c frequent strings sketch partial aggregation support
1658a0c is described below
commit 1658a0c87ce9297d9186eb9558239427ea43d134
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Fri Apr 21 14:15:22 2023 -0700
frequent strings sketch partial aggregation support
---
sql/datasketches_frequent_strings_sketch.sql | 90 ++++++++++++++++++----------
src/frequent_strings_sketch_pg_functions.c | 86 +++++++++++++++++++++-----
2 files changed, 128 insertions(+), 48 deletions(-)
diff --git a/sql/datasketches_frequent_strings_sketch.sql
b/sql/datasketches_frequent_strings_sketch.sql
index 38f2f78..22bb487 100644
--- a/sql/datasketches_frequent_strings_sketch.sql
+++ b/sql/datasketches_frequent_strings_sketch.sql
@@ -19,11 +19,11 @@ CREATE TYPE frequent_strings_sketch;
CREATE OR REPLACE FUNCTION frequent_strings_sketch_in(cstring) RETURNS
frequent_strings_sketch
AS '$libdir/datasketches', 'pg_sketch_in'
- LANGUAGE C STRICT IMMUTABLE;
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE OR REPLACE FUNCTION
frequent_strings_sketch_out(frequent_strings_sketch) RETURNS cstring
AS '$libdir/datasketches', 'pg_sketch_out'
- LANGUAGE C STRICT IMMUTABLE;
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE TYPE frequent_strings_sketch (
INPUT = frequent_strings_sketch_in,
@@ -34,67 +34,91 @@ CREATE TYPE frequent_strings_sketch (
CREATE CAST (bytea as frequent_strings_sketch) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (frequent_strings_sketch as bytea) WITHOUT FUNCTION AS ASSIGNMENT;
-CREATE OR REPLACE FUNCTION frequent_strings_sketch_add_item(internal, int,
varchar) RETURNS internal
- AS '$libdir/datasketches', 'pg_frequent_strings_sketch_add_item'
- LANGUAGE C IMMUTABLE;
+CREATE OR REPLACE FUNCTION frequent_strings_sketch_build_agg(internal, int,
varchar) RETURNS internal
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_build_agg'
+ LANGUAGE C IMMUTABLE PARALLEL SAFE;
-CREATE OR REPLACE FUNCTION frequent_strings_sketch_add_item(internal, int,
varchar, bigint) RETURNS internal
- AS '$libdir/datasketches', 'pg_frequent_strings_sketch_add_item'
- LANGUAGE C IMMUTABLE;
+CREATE OR REPLACE FUNCTION frequent_strings_sketch_build_agg(internal, int,
varchar, bigint) RETURNS internal
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_build_agg'
+ LANGUAGE C IMMUTABLE PARALLEL SAFE;
-CREATE OR REPLACE FUNCTION frequent_strings_sketch_merge(internal, int,
frequent_strings_sketch) RETURNS internal
- AS '$libdir/datasketches', 'pg_frequent_strings_sketch_merge'
- LANGUAGE C IMMUTABLE;
+CREATE OR REPLACE FUNCTION frequent_strings_sketch_merge_agg(internal, int,
frequent_strings_sketch) RETURNS internal
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_merge_agg'
+ LANGUAGE C IMMUTABLE PARALLEL SAFE;
-CREATE OR REPLACE FUNCTION frequent_strings_sketch_from_internal(internal)
RETURNS frequent_strings_sketch
- AS '$libdir/datasketches', 'pg_frequent_strings_sketch_from_internal'
- LANGUAGE C STRICT IMMUTABLE;
+CREATE OR REPLACE FUNCTION frequent_strings_sketch_serialize(internal) RETURNS
bytea
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_serialize'
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
-CREATE OR REPLACE FUNCTION
frequent_strings_sketch_to_string(frequent_strings_sketch) RETURNS TEXT
- AS '$libdir/datasketches', 'pg_frequent_strings_sketch_to_string'
- LANGUAGE C STRICT IMMUTABLE;
+CREATE OR REPLACE FUNCTION frequent_strings_sketch_deserialize(bytea,
internal) RETURNS internal
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_deserialize'
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
-CREATE OR REPLACE FUNCTION
frequent_strings_sketch_to_string(frequent_strings_sketch, boolean) RETURNS TEXT
- AS '$libdir/datasketches', 'pg_frequent_strings_sketch_to_string'
- LANGUAGE C STRICT IMMUTABLE;
+CREATE OR REPLACE FUNCTION frequent_strings_sketch_combine(internal, internal)
RETURNS internal
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_combine'
+ LANGUAGE C IMMUTABLE PARALLEL SAFE;
+
+CREATE OR REPLACE FUNCTION frequent_strings_sketch_finalize(internal) RETURNS
frequent_strings_sketch
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_serialize'
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE AGGREGATE frequent_strings_sketch_build(int, varchar) (
- sfunc = frequent_strings_sketch_add_item,
- stype = internal,
- finalfunc = frequent_strings_sketch_from_internal
+ STYPE = internal,
+ SFUNC = frequent_strings_sketch_build_agg,
+ COMBINEFUNC = frequent_strings_sketch_combine,
+ SERIALFUNC = frequent_strings_sketch_serialize,
+ DESERIALFUNC = frequent_strings_sketch_deserialize,
+ FINALFUNC = frequent_strings_sketch_finalize,
+ PARALLEL = SAFE
);
CREATE AGGREGATE frequent_strings_sketch_build(int, varchar, bigint) (
- sfunc = frequent_strings_sketch_add_item,
- stype = internal,
- finalfunc = frequent_strings_sketch_from_internal
+ STYPE = internal,
+ SFUNC = frequent_strings_sketch_build_agg,
+ COMBINEFUNC = frequent_strings_sketch_combine,
+ SERIALFUNC = frequent_strings_sketch_serialize,
+ DESERIALFUNC = frequent_strings_sketch_deserialize,
+ FINALFUNC = frequent_strings_sketch_finalize,
+ PARALLEL = SAFE
);
CREATE AGGREGATE frequent_strings_sketch_merge(int, frequent_strings_sketch) (
- sfunc = frequent_strings_sketch_merge,
- stype = internal,
- finalfunc = frequent_strings_sketch_from_internal
+ STYPE = internal,
+ SFUNC = frequent_strings_sketch_merge_agg,
+ COMBINEFUNC = frequent_strings_sketch_combine,
+ SERIALFUNC = frequent_strings_sketch_serialize,
+ DESERIALFUNC = frequent_strings_sketch_deserialize,
+ FINALFUNC = frequent_strings_sketch_finalize,
+ PARALLEL = SAFE
);
+CREATE OR REPLACE FUNCTION
frequent_strings_sketch_to_string(frequent_strings_sketch) RETURNS TEXT
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_to_string'
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OR REPLACE FUNCTION
frequent_strings_sketch_to_string(frequent_strings_sketch, boolean) RETURNS TEXT
+ AS '$libdir/datasketches', 'pg_frequent_strings_sketch_to_string'
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
CREATE TYPE frequent_strings_sketch_row AS (str varchar, estimate bigint,
lower_bound bigint, upper_bound bigint);
CREATE OR REPLACE FUNCTION
frequent_strings_sketch_result_no_false_positives(frequent_strings_sketch)
RETURNS setof frequent_strings_sketch_row
AS '$libdir/datasketches',
'pg_frequent_strings_sketch_result_no_false_positives'
- LANGUAGE C STRICT IMMUTABLE;
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE OR REPLACE FUNCTION
frequent_strings_sketch_result_no_false_positives(frequent_strings_sketch,
bigint)
RETURNS setof frequent_strings_sketch_row
AS '$libdir/datasketches',
'pg_frequent_strings_sketch_result_no_false_positives'
- LANGUAGE C STRICT IMMUTABLE;
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE OR REPLACE FUNCTION
frequent_strings_sketch_result_no_false_negatives(frequent_strings_sketch)
RETURNS setof frequent_strings_sketch_row
AS '$libdir/datasketches',
'pg_frequent_strings_sketch_result_no_false_negatives'
- LANGUAGE C STRICT IMMUTABLE;
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE OR REPLACE FUNCTION
frequent_strings_sketch_result_no_false_negatives(frequent_strings_sketch,
bigint)
RETURNS setof frequent_strings_sketch_row
AS '$libdir/datasketches',
'pg_frequent_strings_sketch_result_no_false_negatives'
- LANGUAGE C STRICT IMMUTABLE;
+ LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
diff --git a/src/frequent_strings_sketch_pg_functions.c
b/src/frequent_strings_sketch_pg_functions.c
index 5c63c15..71b8c11 100644
--- a/src/frequent_strings_sketch_pg_functions.c
+++ b/src/frequent_strings_sketch_pg_functions.c
@@ -26,22 +26,23 @@
#include <funcapi.h>
#include "frequent_strings_sketch_c_adapter.h"
-#include "base64.h"
/* PG_FUNCTION_INFO_V1 macro to pass functions to postgres */
-PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_add_item);
-PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_merge);
-PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_from_internal);
+PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_build_agg);
+PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_merge_agg);
+PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_serialize);
+PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_deserialize);
+PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_combine);
PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_to_string);
PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_result_no_false_positives);
PG_FUNCTION_INFO_V1(pg_frequent_strings_sketch_result_no_false_negatives);
/* function declarations */
-Datum pg_frequent_strings_sketch_recv(PG_FUNCTION_ARGS);
-Datum pg_frequent_strings_sketch_send(PG_FUNCTION_ARGS);
-Datum pg_frequent_strings_sketch_add_item(PG_FUNCTION_ARGS);
-Datum pg_frequent_strings_sketch_merge(PG_FUNCTION_ARGS);
-Datum pg_frequent_strings_sketch_from_internal(PG_FUNCTION_ARGS);
+Datum pg_frequent_strings_sketch_build_agg(PG_FUNCTION_ARGS);
+Datum pg_frequent_strings_sketch_merge_agg(PG_FUNCTION_ARGS);
+Datum pg_frequent_strings_sketch_serialize(PG_FUNCTION_ARGS);
+Datum pg_frequent_strings_sketch_deserialize(PG_FUNCTION_ARGS);
+Datum pg_frequent_strings_sketch_combine(PG_FUNCTION_ARGS);
Datum pg_frequent_strings_sketch_to_string(PG_FUNCTION_ARGS);
Datum pg_frequent_strings_sketch_result_no_false_positives(PG_FUNCTION_ARGS);
Datum pg_frequent_strings_sketch_result_no_false_negatives(PG_FUNCTION_ARGS);
@@ -49,7 +50,7 @@ Datum
pg_frequent_strings_sketch_result_no_false_negatives(PG_FUNCTION_ARGS);
Datum frequent_strings_sketch_get_result(PG_FUNCTION_ARGS, bool);
-Datum pg_frequent_strings_sketch_add_item(PG_FUNCTION_ARGS) {
+Datum pg_frequent_strings_sketch_build_agg(PG_FUNCTION_ARGS) {
void* sketchptr;
unsigned lg_k;
const VarChar* str;
@@ -65,7 +66,7 @@ Datum pg_frequent_strings_sketch_add_item(PG_FUNCTION_ARGS) {
}
if (!AggCheckCallContext(fcinfo, &aggcontext)) {
- elog(ERROR, "frequent_strings_sketch_add_item called in non-aggregate
context");
+ elog(ERROR, "frequent_strings_sketch_build_agg called in non-aggregate
context");
}
oldcontext = MemoryContextSwitchTo(aggcontext);
@@ -88,7 +89,7 @@ Datum pg_frequent_strings_sketch_add_item(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER(sketchptr);
}
-Datum pg_frequent_strings_sketch_merge(PG_FUNCTION_ARGS) {
+Datum pg_frequent_strings_sketch_merge_agg(PG_FUNCTION_ARGS) {
void* unionptr;
bytea* sketch_bytes;
void* sketchptr;
@@ -104,7 +105,7 @@ Datum pg_frequent_strings_sketch_merge(PG_FUNCTION_ARGS) {
}
if (!AggCheckCallContext(fcinfo, &aggcontext)) {
- elog(ERROR, "frequent_strings_sketch_merge called in non-aggregate
context");
+ elog(ERROR, "frequent_strings_sketch_merge_agg called in non-aggregate
context");
}
oldcontext = MemoryContextSwitchTo(aggcontext);
@@ -125,13 +126,13 @@ Datum pg_frequent_strings_sketch_merge(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER(unionptr);
}
-Datum pg_frequent_strings_sketch_from_internal(PG_FUNCTION_ARGS) {
+Datum pg_frequent_strings_sketch_serialize(PG_FUNCTION_ARGS) {
void* sketchptr;
struct ptr_with_size bytes_out;
MemoryContext aggcontext;
if (PG_ARGISNULL(0)) PG_RETURN_NULL();
if (!AggCheckCallContext(fcinfo, &aggcontext)) {
- elog(ERROR, "frequent_strings_sketch_from_internal called in non-aggregate
context");
+ elog(ERROR, "frequent_strings_sketch_serialize called in non-aggregate
context");
}
sketchptr = PG_GETARG_POINTER(0);
bytes_out = frequent_strings_sketch_serialize(sketchptr, VARHDRSZ);
@@ -140,6 +141,61 @@ Datum
pg_frequent_strings_sketch_from_internal(PG_FUNCTION_ARGS) {
PG_RETURN_BYTEA_P(bytes_out.ptr);
}
+Datum pg_frequent_strings_sketch_deserialize(PG_FUNCTION_ARGS) {
+ const bytea* bytes_in;
+ void* sketchptr;
+
+ MemoryContext oldcontext;
+ MemoryContext aggcontext;
+
+ if (PG_ARGISNULL(0)) PG_RETURN_NULL();
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext)) {
+ elog(ERROR, "frequent_strings_sketch_deserialize called in non-aggregate
context");
+ }
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+
+ bytes_in = PG_GETARG_BYTEA_P(0);
+ sketchptr = frequent_strings_sketch_deserialize(VARDATA(bytes_in),
VARSIZE(bytes_in) - VARHDRSZ);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_POINTER(sketchptr);
+}
+
+Datum pg_frequent_strings_sketch_combine(PG_FUNCTION_ARGS) {
+ void* sketchptr1;
+ void* sketchptr2;
+ void* sketchptr;
+
+ MemoryContext oldcontext;
+ MemoryContext aggcontext;
+
+ if (PG_ARGISNULL(0) && PG_ARGISNULL(1)) PG_RETURN_NULL();
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext)) {
+ elog(ERROR, "frequent_strings_sketch_combine called in non-aggregate
context");
+ }
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+
+ sketchptr1 = PG_GETARG_POINTER(0);
+ sketchptr2 = PG_GETARG_POINTER(1);
+
+ if (sketchptr1) {
+ sketchptr = sketchptr1;
+ if (sketchptr2) {
+ frequent_strings_sketch_merge(sketchptr, sketchptr2);
+ }
+ frequent_strings_sketch_delete(sketchptr2);
+ } else {
+ sketchptr = sketchptr2;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_POINTER(sketchptr);
+}
+
Datum pg_frequent_strings_sketch_to_string(PG_FUNCTION_ARGS) {
const bytea* bytes_in;
void* sketchptr;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]