This is an automated email from the ASF dual-hosted git repository.

twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new 16e778389 fix(search): support escaped character in tags (#3110)
16e778389 is described below

commit 16e7783895e5b910cf9026a2be1ef062d93421b6
Author: zhenghaoz <[email protected]>
AuthorDate: Wed Aug 13 17:23:58 2025 +0800

    fix(search): support escaped character in tags (#3110)
    
    Co-authored-by: hulk <[email protected]>
    Co-authored-by: Twice <[email protected]>
---
 src/search/common_parser.h              |  9 +++++++++
 src/search/common_transformer.h         | 19 +++++++++++++++++++
 src/search/redis_query_parser.h         |  2 +-
 src/search/redis_query_transformer.h    |  6 +++---
 tests/gocase/unit/search/search_test.go | 20 ++++++++++++++++++++
 5 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/src/search/common_parser.h b/src/search/common_parser.h
index 7d617dbdc..ae13dd43e 100644
--- a/src/search/common_parser.h
+++ b/src/search/common_parser.h
@@ -53,4 +53,13 @@ struct WSPad : peg::pad<T, WhiteSpace> {};
 struct UnsignedInteger : Digits {};
 struct Integer : peg::seq<peg::opt<peg::one<'-'>>, Digits> {};
 
+struct Cntrl : peg::ranges<'\0', '\x1F', '\x7F'> {};
+struct Escape : peg::one<'\\'> {};
+struct Punct : peg::ranges<'!', '/', ':', '@', '[', '`', '{', '~'> {};
+struct EscapedCharacter : peg::seq<Escape, peg::sor<Punct, peg::space, 
Escape>> {};
+struct Term
+    : peg::plus<
+          peg::sor<peg::minus<peg::any, peg::sor<Punct, Cntrl, WhiteSpace, 
Escape>>, EscapedCharacter, peg::one<'_'>>> {
+};
+
 }  // namespace kqir
diff --git a/src/search/common_transformer.h b/src/search/common_transformer.h
index 18b2626d7..6a62a205c 100644
--- a/src/search/common_transformer.h
+++ b/src/search/common_transformer.h
@@ -106,6 +106,25 @@ struct TreeTransformer {
     return result;
   }
 
+  static StatusOr<std::string> UnescapeTerm(std::string_view str) {
+    std::string result;
+    while (!str.empty()) {
+      if (str[0] == '\\') {
+        str.remove_prefix(1);
+        if (ispunct(str[0]) || isspace(str[0]) || str[0] == '\\') {
+          result.push_back(str[0]);
+        } else {
+          return {Status::NotOK, fmt::format("invalid escape sequence in term: 
{}", str)};
+        }
+        str.remove_prefix(1);
+      } else {
+        result.push_back(str[0]);
+        str.remove_prefix(1);
+      }
+    }
+    return result;
+  }
+
   template <typename T = double>
   static StatusOr<std::vector<T>> Binary2Vector(std::string_view str) {
     if (str.size() % sizeof(T) != 0) {
diff --git a/src/search/redis_query_parser.h b/src/search/redis_query_parser.h
index b5786950e..2e6c32cb9 100644
--- a/src/search/redis_query_parser.h
+++ b/src/search/redis_query_parser.h
@@ -39,7 +39,7 @@ struct Field : seq<one<'@'>, Identifier> {};
 
 struct Param : seq<one<'$'>, Identifier> {};
 
-struct Tag : sor<Identifier, StringL, Param, Number> {};
+struct Tag : sor<StringL, Param, Number, Term> {};
 struct TagList : seq<one<'{'>, WSPad<Tag>, star<seq<one<'|'>, WSPad<Tag>>>, 
one<'}'>> {};
 
 struct NumberOrParam : sor<Number, Param> {};
diff --git a/src/search/redis_query_transformer.h 
b/src/search/redis_query_transformer.h
index 4ff0726f4..08e7f63c1 100644
--- a/src/search/redis_query_transformer.h
+++ b/src/search/redis_query_transformer.h
@@ -36,7 +36,7 @@ namespace ir = kqir;
 
 template <typename Rule>
 using TreeSelector = parse_tree::selector<
-    Rule, parse_tree::store_content::on<Number, UnsignedInteger, StringL, 
Param, Identifier, Inf>,
+    Rule, parse_tree::store_content::on<Number, UnsignedInteger, StringL, 
Param, Identifier, Inf, Term>,
     parse_tree::remove_content::on<TagList, NumericRange, VectorRange, 
ExclusiveNumber, FieldQuery, NotExpr, AndExpr,
                                    OrExpr, PrefilterExpr, KnnSearch, Wildcard, 
VectorRangeToken, KnnToken, ArrowOp>>;
 
@@ -93,8 +93,8 @@ struct Transformer : ir::TreeTransformer {
 
         for (const auto& tag : query->children) {
           std::string tag_str;
-          if (Is<Identifier>(tag)) {
-            tag_str = tag->string();
+          if (Is<Term>(tag)) {
+            tag_str = GET_OR_RET(UnescapeTerm(tag->string()));
           } else if (Is<StringL>(tag)) {
             tag_str = GET_OR_RET(UnescapeString(tag->string()));
           } else if (Is<Param>(tag)) {
diff --git a/tests/gocase/unit/search/search_test.go 
b/tests/gocase/unit/search/search_test.go
index 905094d4e..184bf3152 100644
--- a/tests/gocase/unit/search/search_test.go
+++ b/tests/gocase/unit/search/search_test.go
@@ -253,4 +253,24 @@ func TestSearchTag(t *testing.T) {
                require.Equal(t, int64(1), res.Val().([]interface{})[0])
                require.Equal(t, "testidx_number:k1", 
res.Val().([]interface{})[1])
        })
+
+       t.Run("FT.SEARCH with escaped characters in tags", func(t *testing.T) {
+               require.NoError(t, rdb.Do(ctx, "FT.CREATE", "testidx_escape", 
"ON", "HASH", "PREFIX", "1", "testidx_escape:", "SCHEMA", "a", "TAG").Err())
+               require.NoError(t, rdb.Do(ctx, "HSET", "testidx_escape:k1", 
"a", "[email protected]").Err())
+               require.NoError(t, rdb.Do(ctx, "HSET", "testidx_escape:k2", 
"a", "Hello World").Err())
+
+               res := rdb.Do(ctx, "FT.SEARCH", "testidx_escape", 
`@a:{email\@example\.com}`)
+               require.NoError(t, res.Err())
+               // result should be [1 testidx_escape:k1 [a [email protected]]]
+               require.Equal(t, 3, len(res.Val().([]interface{})))
+               require.Equal(t, int64(1), res.Val().([]interface{})[0])
+               require.Equal(t, "testidx_escape:k1", 
res.Val().([]interface{})[1])
+
+               res = rdb.Do(ctx, "FT.SEARCH", "testidx_escape", `@a:{Hello\ 
World}`)
+               require.NoError(t, res.Err())
+               // result should be [1 testidx_escape:k2 [b Hello World]]
+               require.Equal(t, 3, len(res.Val().([]interface{})))
+               require.Equal(t, int64(1), res.Val().([]interface{})[0])
+               require.Equal(t, "testidx_escape:k2", 
res.Val().([]interface{})[1])
+       })
 }

Reply via email to