This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch improve_theta_code_docs
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit f644cfadec9798e6853f924cfd557b4b2ea9b5db
Author: Lee Rhodes <[email protected]>
AuthorDate: Tue Sep 10 11:48:13 2024 -0700

    Improve code docs for Theta.
    
    I want to see how this looks before proceeding to the other sketches.
---
 theta/sqlx/theta_sketch_a_not_b.sqlx                | 14 ++++++++++++--
 theta/sqlx/theta_sketch_agg_string.sqlx             | 21 +++++++++++++++------
 theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx  | 21 ++++++++++++++-------
 theta/sqlx/theta_sketch_agg_union.sqlx              | 16 ++++++++++++----
 theta/sqlx/theta_sketch_get_estimate.sqlx           | 11 +++++++++--
 .../sqlx/theta_sketch_get_estimate_and_bounds.sqlx  | 13 ++++++++++---
 theta/sqlx/theta_sketch_jaccard_similarity.sqlx     |  8 ++++++--
 theta/sqlx/theta_sketch_scalar_intersection.sqlx    | 18 ++++++++++++++----
 theta/sqlx/theta_sketch_scalar_union.sqlx           | 16 ++++++++++++----
 theta/sqlx/theta_sketch_to_string.sqlx              | 10 +++++++---
 10 files changed, 111 insertions(+), 37 deletions(-)

diff --git a/theta/sqlx/theta_sketch_a_not_b.sqlx 
b/theta/sqlx/theta_sketch_a_not_b.sqlx
index d16b91a..9b36bcf 100644
--- a/theta/sqlx/theta_sketch_a_not_b.sqlx
+++ b/theta/sqlx/theta_sketch_a_not_b.sqlx
@@ -25,11 +25,21 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.js"],
   description = '''Computes a sketch that represents the scalar set 
difference: sketchA and not sketchB.
+
 Param sketchA: the first sketch "A" as bytes.
 Param sketchB: the second sketch "B" as bytes.
 Param seed: This is used to confirm that the given sketches were configured 
with the correct seed.
-Returns: a Compact, Compressed Theta Sketch, as bytes, from which the set 
difference cardinality can be obtained.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Returns: a Compact, Compressed Theta Sketch, as BYTES.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchSetOps.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchSetOpsAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSize.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+ '''
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
diff --git a/theta/sqlx/theta_sketch_agg_string.sqlx 
b/theta/sqlx/theta_sketch_agg_string.sqlx
index 583a8a4..73590f6 100644
--- a/theta/sqlx/theta_sketch_agg_string.sqlx
+++ b/theta/sqlx/theta_sketch_agg_string.sqlx
@@ -23,12 +23,21 @@ CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(str STRING)
 RETURNS BYTES
 OPTIONS (
   description = '''Creates a sketch that represents the cardinality of the 
given STRING column.
+  
 Param str: the STRING column of identifiers.
-Param lg_k: 12 (assume default)
-Param seed: 9001 (assume default)
-Param p: 1.0 (assume default)
-Returns: a Compact, Compressed Theta Sketch, as bytes, from which the 
cardinality can be obtained. 
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Param lg_k: assume default = 12
+Param seed: assume default = 9001
+Param p: assume default = 1.0
+Returns: a Compact, Compressed Theta Sketch, as BYTES. 
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSize.html
+ - https://datasketches.apache.org/docs/Theta/ThetaPSampling.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+ '''
 ) AS (
-  $BQ_DATASET.theta_sketch_agg_string_lgk_seed_p(str, STRUCT<BYTEINT, INT64, 
FLOAT64>(null, null, null))
+  $BQ_DATASET.theta_sketch_agg_string_lgk_seed_p(str, STRUCT<BYTEINT, INT64, 
FLOAT64>(NULL, NULL, NULL))
 );
diff --git a/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx 
b/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx
index c676f0c..33ca2c2 100644
--- a/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx
+++ b/theta/sqlx/theta_sketch_agg_string_lgk_seed_p.sqlx
@@ -25,14 +25,21 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.mjs"],
   description = '''Creates a sketch that represents the cardinality of the 
given STRING column.
+
 Param str: the STRING column of identifiers.
-Param lg_k: the sketch accuracy/size parameter as an integer in the range [4, 
26].
-Param seed: the seed to be used by the underlying hash function.
-Param p: sampling probability (initial theta). The default is 1, so the sketch 
retains
-all entries until it reaches the limit, at which point it goes into the 
estimation mode
-and reduces the effective sampling probability (theta) as necessary.
-Returns: a Compact, Compressed Theta Sketch, as bytes, from which the 
cardinality can be obtained. 
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Param lg_k: the sketch accuracy/size parameter as a BYTEINT in the range [4, 
26]. A NULL specifies the default of 12.
+Param seed: the seed to be used by the underlying hash function. A NULL 
specifies the default of 9001.
+Param p: up-front sampling probability. A NULL specifies the default of 1.0.
+Returns: a Compact, Compressed Theta Sketch, as BYTES.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSize.html
+ - https://datasketches.apache.org/docs/Theta/ThetaPSampling.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+ '''
 ) AS R"""
 import ModuleFactory from "gs://$GCS_BUCKET/theta_sketch.mjs";
 var Module = await ModuleFactory();
diff --git a/theta/sqlx/theta_sketch_agg_union.sqlx 
b/theta/sqlx/theta_sketch_agg_union.sqlx
index 023c1a8..49c3633 100644
--- a/theta/sqlx/theta_sketch_agg_union.sqlx
+++ b/theta/sqlx/theta_sketch_agg_union.sqlx
@@ -25,11 +25,19 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.mjs"],
   description = '''Creates a sketch that represents the union of the given 
column of sketches.
-Param sketch: the column of sketches. Each as bytes.
-Param lg_k: the sketch accuracy/size parameter as an integer in the range [4, 
26].
+
+Param sketch: the column of sketches. Each as BYTES.
+Param lg_k: the sketch accuracy/size parameter as a BYTEINT in the range [4, 
26].
 Param seed: This is used to confirm that the given sketches were configured 
with the correct seed.
-Returns: a Compact, Compressed Theta Sketch, as bytes, from which the union 
cardinality can be obtained.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Returns: a Compact, Compressed Theta Sketch, as BYTES.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSize.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+'''
 ) AS R"""
 import ModuleFactory from "gs://$GCS_BUCKET/theta_sketch.mjs";
 var Module = await ModuleFactory();
diff --git a/theta/sqlx/theta_sketch_get_estimate.sqlx 
b/theta/sqlx/theta_sketch_get_estimate.sqlx
index d37e6e7..f51c50f 100644
--- a/theta/sqlx/theta_sketch_get_estimate.sqlx
+++ b/theta/sqlx/theta_sketch_get_estimate.sqlx
@@ -25,10 +25,17 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.js"],
   description = '''Gets cardinality estimate and bounds from given sketch.
-Param sketch: The given sketch to query as bytes.
+  
+Param sketch: The given sketch to query as BYTES.
 Param seed: This is used to confirm that the given sketch was configured with 
the correct seed.
 Returns: a FLOAT64 value as the cardinality estimate.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+'''
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
diff --git a/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx 
b/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx
index 0929a25..635259c 100644
--- a/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx
+++ b/theta/sqlx/theta_sketch_get_estimate_and_bounds.sqlx
@@ -25,13 +25,20 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.js"],
   description = '''Gets cardinality estimate and bounds from given sketch.
-Param sketch: The given sketch to query as bytes.
+
+Param sketch: The given sketch to query as BYTES.
 Param num_std_devs: The returned bounds will be based on the statistical 
confidence interval determined by the given number of standard deviations
 from the returned estimate. This number may be one of {1,2,3}, where 1 
represents 68% confidence, 2 represents 95% confidence and 3 represents 99.7% 
confidence.
 For example, if the given num_std_devs = 2 and the returned values are {1000, 
990, 1010} that means that with 95% confidence, the true value lies within the 
range [990, 1010].
 Param seed: This is used to confirm that the given sketch was configured with 
the correct seed.
-Returns: a struct with 3 FLOAT64 values as {estimate, lower_bound, 
upper_bound}.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Returns: a STRUCT with three FLOAT64 values as {estimate, lower_bound, 
upper_bound}.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+'''
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
diff --git a/theta/sqlx/theta_sketch_jaccard_similarity.sqlx 
b/theta/sqlx/theta_sketch_jaccard_similarity.sqlx
index 0ba0c86..5d80fb1 100644
--- a/theta/sqlx/theta_sketch_jaccard_similarity.sqlx
+++ b/theta/sqlx/theta_sketch_jaccard_similarity.sqlx
@@ -28,11 +28,15 @@ OPTIONS (
 J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are 
to each other.
 If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are 
disjoint.
 A Jaccard of .95 means the overlap between the two sets is 95% of the union of 
the two sets.
+
 Param sketchA: the first sketch as bytes.
 Param sketchB: the second sketch as bytes.
 Param seed: This is used to confirm that the given sketches were configured 
with the correct seed.
-Returns: a struct with 3 floating-point values {lower_bound, estimate, 
upper_bound} of the Jaccard index.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Returns: a STRUCT with three FLOAT64 values {lower_bound, estimate, 
upper_bound} of the Jaccard index.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+ '''
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
diff --git a/theta/sqlx/theta_sketch_scalar_intersection.sqlx 
b/theta/sqlx/theta_sketch_scalar_intersection.sqlx
index cdc15e9..24a5621 100644
--- a/theta/sqlx/theta_sketch_scalar_intersection.sqlx
+++ b/theta/sqlx/theta_sketch_scalar_intersection.sqlx
@@ -25,11 +25,21 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.js"],
   description = '''Computes a sketch that represents the scalar intersection 
of the two given sketches.
-Param sketchA: the first sketch as bytes.
-Param sketchB: the second sketch as bytes.
+
+Param sketchA: the first sketch as BYTES.
+Param sketchB: the second sketch as BYTES.
 Param seed: This is used to confirm that the given sketches were configured 
with the correct seed.
-Returns: a Compact, Compressed Theta Sketch, as bytes, from which the 
intersection cardinality can be obtained.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Returns: a Compact, Compressed Theta Sketch, as BYTES.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchSetOps.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchSetOpsAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSize.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+'''
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {
diff --git a/theta/sqlx/theta_sketch_scalar_union.sqlx 
b/theta/sqlx/theta_sketch_scalar_union.sqlx
index 6c13f09..d3fd346 100644
--- a/theta/sqlx/theta_sketch_scalar_union.sqlx
+++ b/theta/sqlx/theta_sketch_scalar_union.sqlx
@@ -25,12 +25,20 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.js"],
   description = '''Computes a sketch that represents the scalar union of the 
two given sketches.
-Param sketchA: the first sketch as bytes.
-Param sketchB: the second sketch as bytes.
+
+Param sketchA: the first sketch as BYTES.
+Param sketchB: the second sketch as BYTES.
 Param lg_k: the sketch accuracy/size parameter as an integer in the range [4, 
26].
 Param seed: This is used to confirm that the given sketches were configured 
with the correct seed.
-Returns: a Compact, Compressed Theta Sketch, as bytes, from which the union 
cardinality can be obtained.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Returns: a Compact, Compressed Theta Sketch, as BYTES.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracy.html
+ - https://datasketches.apache.org/docs/Theta/ThetaAccuracyPlots.html
+ - https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSize.html
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+'''
 ) AS R"""
 const default_lg_k = Number(Module.DEFAULT_LG_K);
 const default_seed = BigInt(Module.DEFAULT_SEED);
diff --git a/theta/sqlx/theta_sketch_to_string.sqlx 
b/theta/sqlx/theta_sketch_to_string.sqlx
index 180cbf2..abebf42 100644
--- a/theta/sqlx/theta_sketch_to_string.sqlx
+++ b/theta/sqlx/theta_sketch_to_string.sqlx
@@ -25,10 +25,14 @@ LANGUAGE js
 OPTIONS (
   library=["gs://$GCS_BUCKET/theta_sketch.js"],
   description = '''Returns a summary string that represents the state of the 
given sketch.
-Param sketch: the given sketch as bytes.
+
+Param sketch: the given sketch as BYTES.
 Param seed: This is used to confirm that the given sketch was configured with 
the correct seed.
-Returns: a string that represents the state of the given sketch.
-For more details: 
https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html'''
+Returns: a STRING that represents the state of the given sketch.
+
+For more information:
+ - https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html
+ '''
 ) AS R"""
 const default_seed = BigInt(Module.DEFAULT_SEED);
 try {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to