This is an automated email from the ASF dual-hosted git repository. willholley pushed a commit to branch mango-beginswith-fixes in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 5296a029d9fe4be6939da113f6bb04194cdbfb41 Author: Will Holley <[email protected]> AuthorDate: Thu Nov 2 09:26:11 2023 +0000 mango: fix $beginsWith range In the intial implementation of $beginsWith, the range calculation for view indexes mistakenly appends an integer with the size of 8 bits which gets maxed out at FF, rather than building a binary with an extra 3 bytes at the end. Additionally, ICU defines the maximum sortable code point as `U+FFFF`. This is a more correct suffix when calculating the key range and is supported by older ICU versions (required for e.g. CentOS 7). This commit fixes the range calculation by correctly appending the `U+FFFF` code point in the range calculation. Additionally, we use the Erlang `utf8` binary type to verify that the result is a valid utf8 string. --- src/mango/src/mango_idx_view.erl | 2 +- src/mango/test/25-beginswith-test.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index d1650e987..29895fbf9 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -417,7 +417,7 @@ range(_, _, LCmp, Low, HCmp, High) -> % beginsWith requires both a high and low bound range({[{<<"$beginsWith">>, Arg}]}, LCmp, Low, HCmp, High) -> {LCmp0, Low0, HCmp0, High0} = range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High), - range({[{<<"$lte">>, <<Arg/binary, 16#10FFFF>>}]}, LCmp0, Low0, HCmp0, High0); + range({[{<<"$lte">>, <<Arg/binary, 16#FFFF/utf8>>}]}, LCmp0, Low0, HCmp0, High0); range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) -> case range_pos(Low, Arg, High) of min -> diff --git a/src/mango/test/25-beginswith-test.py b/src/mango/test/25-beginswith-test.py index 3b5134b65..df96560b4 100644 --- a/src/mango/test/25-beginswith-test.py +++ b/src/mango/test/25-beginswith-test.py @@ -54,7 +54,7 @@ class BeginsWithOperator(mango.DbPerClass): self.assertEqual(mrargs["start_key"], ["A"]) end_key_bytes = to_utf8_bytes(mrargs["end_key"]) - self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbd", b"<MAX>"]) + self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbf", b"<MAX>"]) def test_compound_key(self): selector = {"name": "Eddie", "location": {"$beginsWith": "A"}} @@ -62,7 +62,7 @@ class BeginsWithOperator(mango.DbPerClass): self.assertEqual(mrargs["start_key"], ["Eddie", "A"]) end_key_bytes = to_utf8_bytes(mrargs["end_key"]) - self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbd", b"<MAX>"]) + self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbf", b"<MAX>"]) docs = self.db.find(selector) self.assertEqual(len(docs), 1) @@ -74,12 +74,12 @@ class BeginsWithOperator(mango.DbPerClass): { "sort": ["location"], "start_key": [b"A"], - "end_key": [b"A\xef\xbf\xbd", b"<MAX>"], + "end_key": [b"A\xef\xbf\xbf", b"<MAX>"], "direction": "fwd", }, { "sort": [{"location": "desc"}], - "start_key": [b"A\xef\xbf\xbd", b"<MAX>"], + "start_key": [b"A\xef\xbf\xbf", b"<MAX>"], "end_key": [b"A"], "direction": "rev", }, @@ -97,7 +97,7 @@ class BeginsWithOperator(mango.DbPerClass): self.assertEqual(mrargs["start_key"], "a") end_key_bytes = to_utf8_bytes(mrargs["end_key"]) - self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbd"]) + self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbf"]) def test_no_index(self): selector = {"foo": {"$beginsWith": "a"}}
