On 2015-07-24 01:26, Petr Jelinek wrote:
On 2015-07-24 00:39, Tom Lane wrote:
I wrote:
OK, so "InitSampleScan" for a function called at ExecInitSampleScan time
(which we might as well make optional), and then we'll use
BeginSampleScan
for the function that gets the parameters.  The restart/ReScan function
goes away since BeginSampleScan will take its place.

Here's a WIP patch implementing things this way.  I've also taken the
time
to do a complete code review, and fixed quite a large number of things,
some cosmetic and some not so much.  I have not yet touched the tsm
contrib modules (so they won't even compile...), but I'm reasonably happy
with the state of the core code now.


Eh, I was just writing email with my rewrite :)

I'll do review of this instead then.

The only major difference that I see so far and I'd like you to
incorporate that into your patch is that I renamed the SampleScanCost to
SampleScanGetRelSize because that reflects much better the use of it, it
isn't really used for costing, but for getting the pages and tuples of
the baserel.


Ok, attached are couple of cosmetic changes - what I wrote above, plus comment about why we do float8 + hashing for REPEATABLE (it's not obvious IMHO) and one additional test query.

Do you want to do the contrib changes yourself as well or should I take it?

--
 Petr Jelinek                  http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
index 97d6407..e0a1568 100644
--- a/src/backend/access/tablesample/bernoulli.c
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -46,7 +46,7 @@ typedef struct
 } BernoulliSamplerData;
 
 
-static void bernoulli_samplescancost(PlannerInfo *root,
+static void bernoulli_samplescangetrelsize(PlannerInfo *root,
 						 RelOptInfo *baserel,
 						 List *paramexprs,
 						 BlockNumber *pages,
@@ -73,7 +73,7 @@ tsm_bernoulli_handler(PG_FUNCTION_ARGS)
 	tsm->parameterTypes = list_make1_oid(FLOAT4OID);
 	tsm->repeatable_across_queries = true;
 	tsm->repeatable_across_scans = true;
-	tsm->SampleScanCost = bernoulli_samplescancost;
+	tsm->SampleScanGetRelSize = bernoulli_samplescangetrelsize;
 	tsm->InitSampleScan = bernoulli_initsamplescan;
 	tsm->BeginSampleScan = bernoulli_beginsamplescan;
 	tsm->NextSampleBlock = NULL;
@@ -87,11 +87,11 @@ tsm_bernoulli_handler(PG_FUNCTION_ARGS)
  * Cost estimation.
  */
 static void
-bernoulli_samplescancost(PlannerInfo *root,
-						 RelOptInfo *baserel,
-						 List *paramexprs,
-						 BlockNumber *pages,
-						 double *tuples)
+bernoulli_samplescangetrelsize(PlannerInfo *root,
+							   RelOptInfo *baserel,
+							   List *paramexprs,
+							   BlockNumber *pages,
+							   double *tuples)
 {
 	Node	   *pctnode;
 	float4		samplesize;
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
index 7cece29..1cdb859 100644
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -48,7 +48,7 @@ typedef struct
 } SystemSamplerData;
 
 
-static void system_samplescancost(PlannerInfo *root,
+static void system_samplescangetrelsize(PlannerInfo *root,
 					  RelOptInfo *baserel,
 					  List *paramexprs,
 					  BlockNumber *pages,
@@ -76,7 +76,7 @@ tsm_system_handler(PG_FUNCTION_ARGS)
 	tsm->parameterTypes = list_make1_oid(FLOAT4OID);
 	tsm->repeatable_across_queries = true;
 	tsm->repeatable_across_scans = true;
-	tsm->SampleScanCost = system_samplescancost;
+	tsm->SampleScanGetRelSize = system_samplescangetrelsize;
 	tsm->InitSampleScan = system_initsamplescan;
 	tsm->BeginSampleScan = system_beginsamplescan;
 	tsm->NextSampleBlock = system_nextsampleblock;
@@ -90,11 +90,11 @@ tsm_system_handler(PG_FUNCTION_ARGS)
  * Cost estimation.
  */
 static void
-system_samplescancost(PlannerInfo *root,
-					  RelOptInfo *baserel,
-					  List *paramexprs,
-					  BlockNumber *pages,
-					  double *tuples)
+system_samplescangetrelsize(PlannerInfo *root,
+							RelOptInfo *baserel,
+							List *paramexprs,
+							BlockNumber *pages,
+							double *tuples)
 {
 	Node	   *pctnode;
 	float4		samplesize;
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 32db05e..144d2db 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -325,6 +325,12 @@ tablesample_init(SampleScanState *scanstate)
 		 * suitable seed.  For regression-testing purposes, that has the
 		 * convenient property that REPEATABLE(0) gives a machine-independent
 		 * result.
+		 *
+		 * The reson for using float8 for REPEATABLE at the SQL level is that
+		 * it will work as expected both for users used to databases which
+		 * accept only integers in REPEATABLE clause and for those who might
+		 * expect that REPEATABLE works same way as setseed() (float in the
+		 * range from -1 to 1).
 		 */
 		seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
 	}
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 187db96..5c114f4 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -503,8 +503,8 @@ set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 * we assume the function returns sane values.)
 	 */
 	tsm = GetTsmRoutine(tsc->tsmhandler);
-	tsm->SampleScanCost(root, rel, tsc->args,
-						&pages, &tuples);
+	tsm->SampleScanGetRelSize(root, rel, tsc->args,
+							  &pages, &tuples);
 
 	/*
 	 * If the sampling method does not support repeatable scans, we cannot
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h
index c3b0cab..90f2395 100644
--- a/src/include/access/tsmapi.h
+++ b/src/include/access/tsmapi.h
@@ -20,7 +20,7 @@
  * Callback function signatures --- see tablesample-method.sgml for more info.
  */
 
-typedef void (*SampleScanCost_function) (PlannerInfo *root,
+typedef void (*SampleScanGetRelSize_function) (PlannerInfo *root,
 													 RelOptInfo *baserel,
 													 List *paramexprs,
 													 BlockNumber *pages,
@@ -64,7 +64,7 @@ typedef struct TsmRoutine
 	bool		repeatable_across_scans;
 
 	/* Functions for planning a SampleScan on a physical table */
-	SampleScanCost_function SampleScanCost;
+	SampleScanGetRelSize_function SampleScanGetRelSize;
 
 	/* Functions for executing a SampleScan on a physical table */
 	InitSampleScan_function InitSampleScan;		/* can be NULL */
diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out
index 23ea8cb..55f89bd 100644
--- a/src/test/regress/expected/tablesample.out
+++ b/src/test/regress/expected/tablesample.out
@@ -232,6 +232,8 @@ SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
 ERROR:  tablesample method foobar does not exist
 LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
                                                     ^
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
+ERROR:  TABLESAMPLE parameter cannot be null
 SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
 ERROR:  TABLESAMPLE REPEATABLE parameter cannot be null
 SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
diff --git a/src/test/regress/sql/tablesample.sql b/src/test/regress/sql/tablesample.sql
index e4b5636..9406a19 100644
--- a/src/test/regress/sql/tablesample.sql
+++ b/src/test/regress/sql/tablesample.sql
@@ -68,6 +68,7 @@ select pct, count(unique1) from
 -- errors
 SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
 
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
 SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
 
 SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to