This is an automated email from the ASF dual-hosted git repository.
twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new 20bb15d1 feat(search): trim whitespaces in tag values (#2702)
20bb15d1 is described below
commit 20bb15d1a1bb80a76a2756574c4433165e86a950
Author: Twice <[email protected]>
AuthorDate: Wed Jan 1 08:55:06 2025 +0800
feat(search): trim whitespaces in tag values (#2702)
Signed-off-by: PragmaTwice <[email protected]>
---
src/common/string_util.h | 8 ++++++++
src/search/indexer.cc | 4 ++++
tests/cppunit/indexer_test.cc | 6 +++---
3 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/src/common/string_util.h b/src/common/string_util.h
index 619d95d5..b8d57ef7 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -30,6 +30,8 @@
namespace util {
+inline const char ASCII_WHITESPACES[] = " \t\r\n\v\f";
+
std::string Float2String(double d);
std::string ToLower(std::string in);
std::string ToUpper(std::string in);
@@ -65,4 +67,10 @@ std::string StringJoin(const T &con, F &&f, std::string_view
sep = ", ") {
return res;
}
+template <typename T>
+std::string StringJoin(const T &con, std::string_view sep = ", ") {
+ return StringJoin(
+ con, [](const auto &v) -> decltype(auto) { return v; }, sep);
+}
+
} // namespace util
diff --git a/src/search/indexer.cc b/src/search/indexer.cc
index 630e4c1e..34acdc43 100644
--- a/src/search/indexer.cc
+++ b/src/search/indexer.cc
@@ -69,6 +69,8 @@ StatusOr<kqir::Value>
FieldValueRetriever::ParseFromJson(const jsoncons::json &v
if (val.is_string()) {
const char delim[] = {tag->separator, '\0'};
auto vec = util::Split(val.as_string(), delim);
+ std::transform(vec.begin(), vec.end(), vec.begin(),
+ [](const std::string &s) { return util::Trim(s,
util::ASCII_WHITESPACES); });
return kqir::MakeValue<kqir::StringArray>(vec);
} else if (val.is_array()) {
std::vector<std::string> strs;
@@ -105,6 +107,8 @@ StatusOr<kqir::Value>
FieldValueRetriever::ParseFromHash(const std::string &valu
} else if (auto tag = dynamic_cast<const redis::TagFieldMetadata *>(type)) {
const char delim[] = {tag->separator, '\0'};
auto vec = util::Split(value, delim);
+ std::transform(vec.begin(), vec.end(), vec.begin(),
+ [](const std::string &s) { return util::Trim(s,
util::ASCII_WHITESPACES); });
return kqir::MakeValue<kqir::StringArray>(vec);
} else if (auto vector = dynamic_cast<const redis::HnswVectorFieldMetadata
*>(type)) {
const auto dim = vector->dim;
diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc
index 5e7ecf1a..80746f81 100644
--- a/tests/cppunit/indexer_test.cc
+++ b/tests/cppunit/indexer_test.cc
@@ -125,7 +125,7 @@ TEST_F(IndexerTest, HashTag) {
ASSERT_EQ(s->fields["x"], T("food,kitChen,Beauty"));
uint64_t cnt = 0;
- auto s_set = db.Set(*ctx_, key1, "x", "Clothing,FOOD,sport", &cnt);
+ auto s_set = db.Set(*ctx_, key1, "x", "Clothing,FOOD,sport ", &cnt);
ASSERT_EQ(cnt, 0);
ASSERT_TRUE(s_set.ok());
@@ -214,7 +214,7 @@ TEST_F(IndexerTest, JsonTag) {
ASSERT_EQ(s->fields.size(), 1);
ASSERT_EQ(s->fields["$.x"], T("food,kitChen,Beauty"));
- auto s_set = db.Set(*ctx_, key1, "$.x", "\"Clothing,FOOD,sport\"");
+ auto s_set = db.Set(*ctx_, key1, "$.x", "\" Clothing, FOOD ,sport\"");
ASSERT_TRUE(s_set.ok());
auto s2 = indexer.Update(*ctx_, *s);
@@ -259,7 +259,7 @@ TEST_F(IndexerTest, JsonTagBuildIndex) {
auto idxname = "jsontest";
{
- auto s_set = db.Set(*ctx_, key1, "$", R"({"x": "food,kitChen,Beauty"})");
+ auto s_set = db.Set(*ctx_, key1, "$", R"({"x": "food , \nkitChen,Beauty\t
"})");
ASSERT_TRUE(s_set.ok());
auto s2 = indexer.updater_list[1]->Build(*ctx_);