This is an automated email from the ASF dual-hosted git repository.
twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new 16e778389 fix(search): support escaped character in tags (#3110)
16e778389 is described below
commit 16e7783895e5b910cf9026a2be1ef062d93421b6
Author: zhenghaoz <[email protected]>
AuthorDate: Wed Aug 13 17:23:58 2025 +0800
fix(search): support escaped character in tags (#3110)
Co-authored-by: hulk <[email protected]>
Co-authored-by: Twice <[email protected]>
---
src/search/common_parser.h | 9 +++++++++
src/search/common_transformer.h | 19 +++++++++++++++++++
src/search/redis_query_parser.h | 2 +-
src/search/redis_query_transformer.h | 6 +++---
tests/gocase/unit/search/search_test.go | 20 ++++++++++++++++++++
5 files changed, 52 insertions(+), 4 deletions(-)
diff --git a/src/search/common_parser.h b/src/search/common_parser.h
index 7d617dbdc..ae13dd43e 100644
--- a/src/search/common_parser.h
+++ b/src/search/common_parser.h
@@ -53,4 +53,13 @@ struct WSPad : peg::pad<T, WhiteSpace> {};
struct UnsignedInteger : Digits {};
struct Integer : peg::seq<peg::opt<peg::one<'-'>>, Digits> {};
+struct Cntrl : peg::ranges<'\0', '\x1F', '\x7F'> {};
+struct Escape : peg::one<'\\'> {};
+struct Punct : peg::ranges<'!', '/', ':', '@', '[', '`', '{', '~'> {};
+struct EscapedCharacter : peg::seq<Escape, peg::sor<Punct, peg::space,
Escape>> {};
+struct Term
+ : peg::plus<
+ peg::sor<peg::minus<peg::any, peg::sor<Punct, Cntrl, WhiteSpace,
Escape>>, EscapedCharacter, peg::one<'_'>>> {
+};
+
} // namespace kqir
diff --git a/src/search/common_transformer.h b/src/search/common_transformer.h
index 18b2626d7..6a62a205c 100644
--- a/src/search/common_transformer.h
+++ b/src/search/common_transformer.h
@@ -106,6 +106,25 @@ struct TreeTransformer {
return result;
}
+ static StatusOr<std::string> UnescapeTerm(std::string_view str) {
+ std::string result;
+ while (!str.empty()) {
+ if (str[0] == '\\') {
+ str.remove_prefix(1);
+ if (ispunct(str[0]) || isspace(str[0]) || str[0] == '\\') {
+ result.push_back(str[0]);
+ } else {
+ return {Status::NotOK, fmt::format("invalid escape sequence in term:
{}", str)};
+ }
+ str.remove_prefix(1);
+ } else {
+ result.push_back(str[0]);
+ str.remove_prefix(1);
+ }
+ }
+ return result;
+ }
+
template <typename T = double>
static StatusOr<std::vector<T>> Binary2Vector(std::string_view str) {
if (str.size() % sizeof(T) != 0) {
diff --git a/src/search/redis_query_parser.h b/src/search/redis_query_parser.h
index b5786950e..2e6c32cb9 100644
--- a/src/search/redis_query_parser.h
+++ b/src/search/redis_query_parser.h
@@ -39,7 +39,7 @@ struct Field : seq<one<'@'>, Identifier> {};
struct Param : seq<one<'$'>, Identifier> {};
-struct Tag : sor<Identifier, StringL, Param, Number> {};
+struct Tag : sor<StringL, Param, Number, Term> {};
struct TagList : seq<one<'{'>, WSPad<Tag>, star<seq<one<'|'>, WSPad<Tag>>>,
one<'}'>> {};
struct NumberOrParam : sor<Number, Param> {};
diff --git a/src/search/redis_query_transformer.h
b/src/search/redis_query_transformer.h
index 4ff0726f4..08e7f63c1 100644
--- a/src/search/redis_query_transformer.h
+++ b/src/search/redis_query_transformer.h
@@ -36,7 +36,7 @@ namespace ir = kqir;
template <typename Rule>
using TreeSelector = parse_tree::selector<
- Rule, parse_tree::store_content::on<Number, UnsignedInteger, StringL,
Param, Identifier, Inf>,
+ Rule, parse_tree::store_content::on<Number, UnsignedInteger, StringL,
Param, Identifier, Inf, Term>,
parse_tree::remove_content::on<TagList, NumericRange, VectorRange,
ExclusiveNumber, FieldQuery, NotExpr, AndExpr,
OrExpr, PrefilterExpr, KnnSearch, Wildcard,
VectorRangeToken, KnnToken, ArrowOp>>;
@@ -93,8 +93,8 @@ struct Transformer : ir::TreeTransformer {
for (const auto& tag : query->children) {
std::string tag_str;
- if (Is<Identifier>(tag)) {
- tag_str = tag->string();
+ if (Is<Term>(tag)) {
+ tag_str = GET_OR_RET(UnescapeTerm(tag->string()));
} else if (Is<StringL>(tag)) {
tag_str = GET_OR_RET(UnescapeString(tag->string()));
} else if (Is<Param>(tag)) {
diff --git a/tests/gocase/unit/search/search_test.go
b/tests/gocase/unit/search/search_test.go
index 905094d4e..184bf3152 100644
--- a/tests/gocase/unit/search/search_test.go
+++ b/tests/gocase/unit/search/search_test.go
@@ -253,4 +253,24 @@ func TestSearchTag(t *testing.T) {
require.Equal(t, int64(1), res.Val().([]interface{})[0])
require.Equal(t, "testidx_number:k1",
res.Val().([]interface{})[1])
})
+
+ t.Run("FT.SEARCH with escaped characters in tags", func(t *testing.T) {
+ require.NoError(t, rdb.Do(ctx, "FT.CREATE", "testidx_escape",
"ON", "HASH", "PREFIX", "1", "testidx_escape:", "SCHEMA", "a", "TAG").Err())
+ require.NoError(t, rdb.Do(ctx, "HSET", "testidx_escape:k1",
"a", "[email protected]").Err())
+ require.NoError(t, rdb.Do(ctx, "HSET", "testidx_escape:k2",
"a", "Hello World").Err())
+
+ res := rdb.Do(ctx, "FT.SEARCH", "testidx_escape",
`@a:{email\@example\.com}`)
+ require.NoError(t, res.Err())
+ // result should be [1 testidx_escape:k1 [a [email protected]]]
+ require.Equal(t, 3, len(res.Val().([]interface{})))
+ require.Equal(t, int64(1), res.Val().([]interface{})[0])
+ require.Equal(t, "testidx_escape:k1",
res.Val().([]interface{})[1])
+
+ res = rdb.Do(ctx, "FT.SEARCH", "testidx_escape", `@a:{Hello\
World}`)
+ require.NoError(t, res.Err())
+ // result should be [1 testidx_escape:k2 [b Hello World]]
+ require.Equal(t, 3, len(res.Val().([]interface{})))
+ require.Equal(t, int64(1), res.Val().([]interface{})[0])
+ require.Equal(t, "testidx_escape:k2",
res.Val().([]interface{})[1])
+ })
}