The goal is to have (subject foo-bar) match the same messages as
subject:foo-bar.
---
 lib/parse-sexp.cc         | 28 ++++++++++++++++++++++++----
 test/T081-sexpr-search.sh |  8 ++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 4a2fac8b..26d4ee1f 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -66,13 +66,33 @@ _sexp_combine_field (const char *prefix,
 
     for (sexp_t *cur = sx; cur; cur = cur->next) {
        std::string pref_str = prefix;
-       std::string word = cur->val;
 
-       if (operation == Xapian::Query::OP_PHRASE)
-           word = Xapian::Unicode::tolower (word);
+       if (operation == Xapian::Query::OP_PHRASE) {
+           Xapian::Utf8Iterator p (cur->val);
+           Xapian::Utf8Iterator end;
 
+           while (p != end) {
+               Xapian::Utf8Iterator start;
+               while (p != end && ! Xapian::Unicode::is_wordchar (*p))
+                   p++;
 
-       terms.push_back (pref_str + word);
+               if (p == end)
+                   break;
+
+               start = p;
+
+               while (p != end && Xapian::Unicode::is_wordchar (*p))
+                   p++;
+
+               if (p != start) {
+                   std::string word (start, p);
+                   word = Xapian::Unicode::tolower (word);
+                   terms.push_back (pref_str + word);
+               }
+           }
+       } else {
+           terms.push_back (pref_str + cur->val);
+       }
     }
     return Xapian::Query (operation, terms.begin (), terms.end ());
 }
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 1a80a133..6369e483 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -34,4 +34,12 @@ add_message [subject]=utf8-sübjéct '[date]="Sat, 01 Jan 2000 
12:00:00 -0000"'
 output=$(notmuch search --query-syntax=sexp '(subject utf8 sübjéct)' | 
notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; 
utf8-sübjéct (inbox unread)"
 
+test_begin_subtest "Search by 'subject' (utf-8, phrase-token):"
+output=$(notmuch search --query-syntax=sexp '(subject utf8-sübjéct)' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; 
utf8-sübjéct (inbox unread)"
+
+test_begin_subtest "Search by 'subject' (utf-8, quoted string):"
+output=$(notmuch search --query-syntax=sexp '(subject "utf8 sübjéct")' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; 
utf8-sübjéct (inbox unread)"
+
 test_done
-- 
2.30.2
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org

Reply via email to