This is an automated email from the ASF dual-hosted git repository.

twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new 20bb15d1 feat(search): trim whitespaces in tag values (#2702)
20bb15d1 is described below

commit 20bb15d1a1bb80a76a2756574c4433165e86a950
Author: Twice <[email protected]>
AuthorDate: Wed Jan 1 08:55:06 2025 +0800

    feat(search): trim whitespaces in tag values (#2702)
    
    Signed-off-by: PragmaTwice <[email protected]>
---
 src/common/string_util.h      | 8 ++++++++
 src/search/indexer.cc         | 4 ++++
 tests/cppunit/indexer_test.cc | 6 +++---
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/common/string_util.h b/src/common/string_util.h
index 619d95d5..b8d57ef7 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -30,6 +30,8 @@
 
 namespace util {
 
+inline const char ASCII_WHITESPACES[] = " \t\r\n\v\f";
+
 std::string Float2String(double d);
 std::string ToLower(std::string in);
 std::string ToUpper(std::string in);
@@ -65,4 +67,10 @@ std::string StringJoin(const T &con, F &&f, std::string_view 
sep = ", ") {
   return res;
 }
 
+template <typename T>
+std::string StringJoin(const T &con, std::string_view sep = ", ") {
+  return StringJoin(
+      con, [](const auto &v) -> decltype(auto) { return v; }, sep);
+}
+
 }  // namespace util
diff --git a/src/search/indexer.cc b/src/search/indexer.cc
index 630e4c1e..34acdc43 100644
--- a/src/search/indexer.cc
+++ b/src/search/indexer.cc
@@ -69,6 +69,8 @@ StatusOr<kqir::Value> 
FieldValueRetriever::ParseFromJson(const jsoncons::json &v
     if (val.is_string()) {
       const char delim[] = {tag->separator, '\0'};
       auto vec = util::Split(val.as_string(), delim);
+      std::transform(vec.begin(), vec.end(), vec.begin(),
+                     [](const std::string &s) { return util::Trim(s, 
util::ASCII_WHITESPACES); });
       return kqir::MakeValue<kqir::StringArray>(vec);
     } else if (val.is_array()) {
       std::vector<std::string> strs;
@@ -105,6 +107,8 @@ StatusOr<kqir::Value> 
FieldValueRetriever::ParseFromHash(const std::string &valu
   } else if (auto tag = dynamic_cast<const redis::TagFieldMetadata *>(type)) {
     const char delim[] = {tag->separator, '\0'};
     auto vec = util::Split(value, delim);
+    std::transform(vec.begin(), vec.end(), vec.begin(),
+                   [](const std::string &s) { return util::Trim(s, 
util::ASCII_WHITESPACES); });
     return kqir::MakeValue<kqir::StringArray>(vec);
   } else if (auto vector = dynamic_cast<const redis::HnswVectorFieldMetadata 
*>(type)) {
     const auto dim = vector->dim;
diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc
index 5e7ecf1a..80746f81 100644
--- a/tests/cppunit/indexer_test.cc
+++ b/tests/cppunit/indexer_test.cc
@@ -125,7 +125,7 @@ TEST_F(IndexerTest, HashTag) {
     ASSERT_EQ(s->fields["x"], T("food,kitChen,Beauty"));
 
     uint64_t cnt = 0;
-    auto s_set = db.Set(*ctx_, key1, "x", "Clothing,FOOD,sport", &cnt);
+    auto s_set = db.Set(*ctx_, key1, "x", "Clothing,FOOD,sport ", &cnt);
     ASSERT_EQ(cnt, 0);
     ASSERT_TRUE(s_set.ok());
 
@@ -214,7 +214,7 @@ TEST_F(IndexerTest, JsonTag) {
     ASSERT_EQ(s->fields.size(), 1);
     ASSERT_EQ(s->fields["$.x"], T("food,kitChen,Beauty"));
 
-    auto s_set = db.Set(*ctx_, key1, "$.x", "\"Clothing,FOOD,sport\"");
+    auto s_set = db.Set(*ctx_, key1, "$.x", "\"  Clothing, FOOD  ,sport\"");
     ASSERT_TRUE(s_set.ok());
 
     auto s2 = indexer.Update(*ctx_, *s);
@@ -259,7 +259,7 @@ TEST_F(IndexerTest, JsonTagBuildIndex) {
   auto idxname = "jsontest";
 
   {
-    auto s_set = db.Set(*ctx_, key1, "$", R"({"x": "food,kitChen,Beauty"})");
+    auto s_set = db.Set(*ctx_, key1, "$", R"({"x": "food , \nkitChen,Beauty\t 
"})");
     ASSERT_TRUE(s_set.ok());
 
     auto s2 = indexer.updater_list[1]->Build(*ctx_);

Reply via email to