Le dim. 26 sept. 2021 à 15:55, Artur Zakirov <[email protected]> a écrit :
> Nice catch! The patch looks good to me.
> Can you also add a more general test case:
>
> =# SELECT $$'' '1' '2'$$::tsvector;
> ERROR: syntax error in tsvector: "'' '1' '2'"
> LINE 1: SELECT $$'' '1' '2'$$::tsvector;
>
>
Thank you, Artur for spotting this test.
It is now included into this patch.
--
Jean-Christophe Arnu
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 9236ebcc8f..00f80ffcbc 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -329,6 +329,12 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
lex = VARDATA(dlexemes[i]);
lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+ if (lex_len == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
+ errmsg("lexeme array may not contain empty strings")));
+
lex_pos = tsvector_bsearch(tsout, lex, lex_len);
if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
@@ -609,6 +615,12 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
lex = VARDATA(dlexemes[i]);
lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+ if (lex_len == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
+ errmsg("lexeme array may not contain empty strings")));
+
lex_pos = tsvector_bsearch(tsin, lex, lex_len);
if (lex_pos >= 0)
@@ -761,13 +773,18 @@ array_to_tsvector(PG_FUNCTION_ARGS)
deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
- /* Reject nulls (maybe we should just ignore them, instead?) */
+ /* Reject nulls and zero length strings (maybe we should just ignore them, instead?) */
for (i = 0; i < nitems; i++)
{
if (nulls[i])
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("lexeme array may not contain nulls")));
+
+ if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
+ errmsg("lexeme array may not contain empty strings")));
}
/* Sort and de-dup, because this is required for a valid tsvector. */
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
index 2601e312df..f8bf9c6051 100644
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -85,6 +85,10 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
'a':3A,4B 'b':2A 'ba':1237
(1 row)
+SELECT $$'' '1' '2'$$::tsvector;
+ERROR: syntax error in tsvector: "'' '1' '2'"
+LINE 1: SELECT $$'' '1' '2'$$::tsvector;
+ ^
--Base tsquery test
SELECT '1'::tsquery;
tsquery
@@ -1260,6 +1264,8 @@ SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceshi
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
ERROR: lexeme array may not contain nulls
+SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '']);
+ERROR: lexeme array may not contain empty strings
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
unnest
---------------------------------------------
@@ -1330,6 +1336,8 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
ERROR: lexeme array may not contain nulls
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
+ERROR: lexeme array may not contain empty strings
-- array_to_tsvector must sort and de-dup
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
array_to_tsvector
@@ -1375,6 +1383,8 @@ SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
ERROR: lexeme array may not contain nulls
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '']);
+ERROR: lexeme array may not contain empty strings
SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
ts_filter
-------------------------------------------------------------
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
index 30c8c702f0..79a7777407 100644
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -17,6 +17,7 @@ SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
SELECT '''w'':4A,3B,2C,1D,5 a:8';
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+SELECT $$'' '1' '2'$$::tsvector;
--Base tsquery test
SELECT '1'::tsquery;
@@ -240,6 +241,7 @@ SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3':
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']);
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
+SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '']);
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
SELECT unnest('base hidden rebel spaceship strike'::tsvector);
@@ -252,6 +254,7 @@ SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
-- array_to_tsvector must sort and de-dup
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
@@ -262,6 +265,7 @@ SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', '']);
SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a}');