Le ven. 24 sept. 2021 à 13:03, Ranier Vilela <ranier...@gmail.com> a écrit :

>
> Comments are more than welcome!
>>
> 1. Would be better to add this test-and-error before tsvector_bsearch call.
>
> + if (lex_len == 0)
> + ereport(ERROR,
> + (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
> + errmsg("lexeme array may not contain empty strings")));
> +
>
> If lex_len is equal to zero, better get out soon.
>
> 2. The second test-and-error can use lex_len, just like the first test,
> I don't see the point in recalculating the size of lex_len if that's
> already done.
>
> + if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
> + ereport(ERROR,
> + (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
> + errmsg("lexeme array may not contain empty strings")));
> +
>

Hello Ranier,
Thank you for your comments.
Here's a new patch file taking your comments into account.

I was just wondering if empty string eviction is done in the right place.
As you rightfully commented, lex_len is calculated later (once again for a
right purpose) and my code checks for empty strings as soon as possible.
To me, it seems to be the right thing to do (prevent further processing on
lexemes
as soon as possible) but I might omit something.

Regards


Jean-Christophe Arnu
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 9236ebcc8f..00f80ffcbc 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -329,6 +329,12 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 		lex = VARDATA(dlexemes[i]);
 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+		if (lex_len == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
+					 errmsg("lexeme array may not contain empty strings")));
+
 		lex_pos = tsvector_bsearch(tsout, lex, lex_len);
 
 		if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
@@ -609,6 +615,12 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
 
 		lex = VARDATA(dlexemes[i]);
 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+		if (lex_len == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
+					 errmsg("lexeme array may not contain empty strings")));
+
 		lex_pos = tsvector_bsearch(tsin, lex, lex_len);
 
 		if (lex_pos >= 0)
@@ -761,13 +773,18 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 
 	deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
 
-	/* Reject nulls (maybe we should just ignore them, instead?) */
+	/* Reject nulls and zero length strings (maybe we should just ignore them, instead?) */
 	for (i = 0; i < nitems; i++)
 	{
 		if (nulls[i])
 			ereport(ERROR,
 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
 					 errmsg("lexeme array may not contain nulls")));
+
+		if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
+					 errmsg("lexeme array may not contain empty strings")));
 	}
 
 	/* Sort and de-dup, because this is required for a valid tsvector. */
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
index 2601e312df..a258179ef0 100644
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -1260,6 +1260,8 @@ SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceshi
 
 SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
 ERROR:  lexeme array may not contain nulls
+SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '']);
+ERROR:  lexeme array may not contain empty strings
 SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
                    unnest                    
 ---------------------------------------------
@@ -1330,6 +1332,8 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
 
 SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
 ERROR:  lexeme array may not contain nulls
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
+ERROR:  lexeme array may not contain empty strings
 -- array_to_tsvector must sort and de-dup
 SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
  array_to_tsvector 
@@ -1375,6 +1379,8 @@ SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
 
 SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
 ERROR:  lexeme array may not contain nulls
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '']);
+ERROR:  lexeme array may not contain empty strings
 SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
                           ts_filter                          
 -------------------------------------------------------------
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
index 30c8c702f0..6be0b26117 100644
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -240,6 +240,7 @@ SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3':
 SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
 SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']);
 SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
+SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '']);
 
 SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
 SELECT unnest('base hidden rebel spaceship strike'::tsvector);
@@ -252,6 +253,7 @@ SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
 
 SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
 SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
 -- array_to_tsvector must sort and de-dup
 SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
 
@@ -262,6 +264,7 @@ SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '
 SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
 SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
 SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '']);
 
 SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
 SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a}');

Reply via email to