This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch mango-beginswith-fixes
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 5296a029d9fe4be6939da113f6bb04194cdbfb41
Author: Will Holley <[email protected]>
AuthorDate: Thu Nov 2 09:26:11 2023 +0000

    mango: fix $beginsWith range
    
    In the intial implementation of $beginsWith, the range calculation
    for view indexes mistakenly appends an integer with the size of
    8 bits which gets maxed out at FF, rather than building a binary
    with an extra 3 bytes at the end.
    
    Additionally, ICU defines the maximum sortable code point as
    `U+FFFF`. This is a more correct suffix when calculating the
    key range and is supported by older ICU versions (required
    for e.g. CentOS 7).
    
    This commit fixes the range calculation by correctly appending
    the `U+FFFF` code point in the range calculation. Additionally,
    we use the Erlang `utf8` binary type to verify that the result
    is a valid utf8 string.
---
 src/mango/src/mango_idx_view.erl     |  2 +-
 src/mango/test/25-beginswith-test.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index d1650e987..29895fbf9 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -417,7 +417,7 @@ range(_, _, LCmp, Low, HCmp, High) ->
 % beginsWith requires both a high and low bound
 range({[{<<"$beginsWith">>, Arg}]}, LCmp, Low, HCmp, High) ->
     {LCmp0, Low0, HCmp0, High0} = range({[{<<"$gte">>, Arg}]}, LCmp, Low, 
HCmp, High),
-    range({[{<<"$lte">>, <<Arg/binary, 16#10FFFF>>}]}, LCmp0, Low0, HCmp0, 
High0);
+    range({[{<<"$lte">>, <<Arg/binary, 16#FFFF/utf8>>}]}, LCmp0, Low0, HCmp0, 
High0);
 range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
     case range_pos(Low, Arg, High) of
         min ->
diff --git a/src/mango/test/25-beginswith-test.py 
b/src/mango/test/25-beginswith-test.py
index 3b5134b65..df96560b4 100644
--- a/src/mango/test/25-beginswith-test.py
+++ b/src/mango/test/25-beginswith-test.py
@@ -54,7 +54,7 @@ class BeginsWithOperator(mango.DbPerClass):
 
         self.assertEqual(mrargs["start_key"], ["A"])
         end_key_bytes = to_utf8_bytes(mrargs["end_key"])
-        self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbd", b"<MAX>"])
+        self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbf", b"<MAX>"])
 
     def test_compound_key(self):
         selector = {"name": "Eddie", "location": {"$beginsWith": "A"}}
@@ -62,7 +62,7 @@ class BeginsWithOperator(mango.DbPerClass):
 
         self.assertEqual(mrargs["start_key"], ["Eddie", "A"])
         end_key_bytes = to_utf8_bytes(mrargs["end_key"])
-        self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbd", b"<MAX>"])
+        self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbf", b"<MAX>"])
 
         docs = self.db.find(selector)
         self.assertEqual(len(docs), 1)
@@ -74,12 +74,12 @@ class BeginsWithOperator(mango.DbPerClass):
             {
                 "sort": ["location"],
                 "start_key": [b"A"],
-                "end_key": [b"A\xef\xbf\xbd", b"<MAX>"],
+                "end_key": [b"A\xef\xbf\xbf", b"<MAX>"],
                 "direction": "fwd",
             },
             {
                 "sort": [{"location": "desc"}],
-                "start_key": [b"A\xef\xbf\xbd", b"<MAX>"],
+                "start_key": [b"A\xef\xbf\xbf", b"<MAX>"],
                 "end_key": [b"A"],
                 "direction": "rev",
             },
@@ -97,7 +97,7 @@ class BeginsWithOperator(mango.DbPerClass):
 
         self.assertEqual(mrargs["start_key"], "a")
         end_key_bytes = to_utf8_bytes(mrargs["end_key"])
-        self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbd"])
+        self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbf"])
 
     def test_no_index(self):
         selector = {"foo": {"$beginsWith": "a"}}

Reply via email to