This is an automated email from the ASF dual-hosted git repository. willholley pushed a commit to branch mango-beginswith in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit cdfb26d8d891f995ca739f448b60f8ef5ab760cb Author: Will Holley <[email protected]> AuthorDate: Thu Oct 26 12:42:02 2023 +0000 Fix lucene support --- src/docs/src/api/database/find.rst | 136 ++++++++++++++++++---------------- src/mango/src/mango_selector_text.erl | 3 +- src/mango/test/03-operator-test.py | 41 +++++++--- 3 files changed, 103 insertions(+), 77 deletions(-) diff --git a/src/docs/src/api/database/find.rst b/src/docs/src/api/database/find.rst index d25350708..153aa9a09 100644 --- a/src/docs/src/api/database/find.rst +++ b/src/docs/src/api/database/find.rst @@ -673,68 +673,74 @@ In addition, some 'meta' condition operators are available. Some condition operators accept any valid JSON content as the argument. Other condition operators require the argument to be in a specific JSON format. -+---------------+-------------+------------+-----------------------------------+ -| Operator type | Operator | Argument | Purpose | -+===============+=============+============+===================================+ -| (In)equality | ``$lt`` | Any JSON | The field is less than the | -| | | | argument. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$lte`` | Any JSON | The field is less than or equal to| -| | | | the argument. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$eq`` | Any JSON | The field is equal to the argument| -+---------------+-------------+------------+-----------------------------------+ -| | ``$ne`` | Any JSON | The field is not equal to the | -| | | | argument. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$gte`` | Any JSON | The field is greater than or equal| -| | | | to the argument. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$gt`` | Any JSON | The field is greater than the | -| | | | to the argument. | -+---------------+-------------+------------+-----------------------------------+ -| Object | ``$exists`` | Boolean | Check whether the field exists or | -| | | | not, regardless of its value. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$type`` | String | Check the document field's type. | -| | | | Valid values are ``"null"``, | -| | | | ``"boolean"``, ``"number"``, | -| | | | ``"string"``, ``"array"``, and | -| | | | ``"object"``. | -+---------------+-------------+------------+-----------------------------------+ -| Array | ``$in`` | Array of | The document field must exist in | -| | | JSON values| the list provided. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$nin`` | Array of | The document field not must exist | -| | | JSON values| in the list provided. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$size`` | Integer | Special condition to match the | -| | | | length of an array field in a | -| | | | document. Non-array fields cannot | -| | | | match this condition. | -+---------------+-------------+------------+-----------------------------------+ -| Miscellaneous | ``$mod`` | [Divisor, | Divisor is a non-zero integer, | -| | | Remainder] | Remainder is any integer. | -| | | | Non-integer values result in a | -| | | | 404. Matches documents where | -| | | | ``field % Divisor == Remainder`` | -| | | | is true, and only when the | -| | | | document field is an integer. | -+---------------+-------------+------------+-----------------------------------+ -| | ``$regex`` | String | A regular expression pattern to | -| | | | match against the document field. | -| | | | Only matches when the field is a | -| | | | string value and matches the | -| | | | supplied regular expression. The | -| | | | matching algorithms are based on | -| | | | the Perl Compatible Regular | -| | | | Expression (PCRE) library. For | -| | | | more information about what is | -| | | | implemented, see the see the | -| | | | `Erlang Regular Expression | -| | | | <http://erlang.org/doc | -| | | | /man/re.html>`_. | -+---------------+-------------+------------+-----------------------------------+ ++---------------+-----------------+-------------+------------------------------------+ +| Operator type | Operator | Argument | Purpose | ++===============+=================+=============+====================================+ +| (In)equality | ``$lt`` | Any JSON | The field is less than the | +| | | | argument. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$lte`` | Any JSON | The field is less than or equal to | +| | | | the argument. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$eq`` | Any JSON | The field is equal to the argument | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$ne`` | Any JSON | The field is not equal to the | +| | | | argument. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$gte`` | Any JSON | The field is greater than or equal | +| | | | to the argument. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$gt`` | Any JSON | The field is greater than the | +| | | | to the argument. | ++---------------+-----------------+-------------+------------------------------------+ +| Object | ``$exists`` | Boolean | Check whether the field exists or | +| | | | not, regardless of its value. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$type`` | String | Check the document field's type. | +| | | | Valid values are ``"null"``, | +| | | | ``"boolean"``, ``"number"``, | +| | | | ``"string"``, ``"array"``, and | +| | | | ``"object"``. | ++---------------+-----------------+-------------+------------------------------------+ +| Array | ``$in`` | Array of | The document field must exist in | +| | | JSON values | the list provided. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$nin`` | Array of | The document field not must exist | +| | | JSON values | in the list provided. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$size`` | Integer | Special condition to match the | +| | | | length of an array field in a | +| | | | document. Non-array fields cannot | +| | | | match this condition. | ++---------------+-----------------+-------------+------------------------------------+ +| Miscellaneous | ``$mod`` | [Divisor, | Divisor is a non-zero integer, | +| | | Remainder] | Remainder is any integer. | +| | | | Non-integer values result in a | +| | | | 404. Matches documents where | +| | | | ``field % Divisor == Remainder`` | +| | | | is true, and only when the | +| | | | document field is an integer. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$regex`` | String | A regular expression pattern to | +| | | | match against the document field. | +| | | | Only matches when the field is a | +| | | | string value and matches the | +| | | | supplied regular expression. The | +| | | | matching algorithms are based on | +| | | | the Perl Compatible Regular | +| | | | Expression (PCRE) library. For | +| | | | more information about what is | +| | | | implemented, see the see the | +| | | | `Erlang Regular Expression | +| | | | <http://erlang.org/doc | +| | | | /man/re.html>`_. | ++---------------+-----------------+-------------+------------------------------------+ +| | ``$beginsWith`` | String | Matches where the document field | +| | | | begins with the specified prefix | +| | | | (case-sensitive). If the document | +| | | | field contains a non-string value, | +| | | | the document is not matched. | ++---------------+-----------------+-------------+------------------------------------+ .. warning:: Regular expressions do not work with indexes, so they should not be used to @@ -754,8 +760,10 @@ can itself be another operator with arguments of its own. This enables us to build up more complex selector expressions. However, only equality operators such as ``$eq``, ``$gt``, ``$gte``, ``$lt``, -and ``$lte`` (but not ``$ne``) can be used as the basis of a query. You should -include at least one of these in a selector. +``$lte`` and ``$beginsWith`` (but not ``$ne``) can be used as the basis +of a query that can make efficient use of a ``json`` index. You should +include at least one of these in a selector, or consider using +a ``text`` index if more flexibility is required. For example, if you try to perform a query that attempts to match all documents that have a field called `afieldname` containing a value that begins with the diff --git a/src/mango/src/mango_selector_text.erl b/src/mango/src/mango_selector_text.erl index 4a50ff9ba..7d8f73923 100644 --- a/src/mango/src/mango_selector_text.erl +++ b/src/mango/src/mango_selector_text.erl @@ -143,8 +143,9 @@ convert(Path, {[{<<"$exists">>, ShouldExist}]}) -> false -> {op_not, {FieldExists, false}} end; convert(Path, {[{<<"$beginsWith">>, Arg}]}) when is_binary(Arg) -> + Prefix = mango_util:lucene_escape_query_value(Arg), Suffix = <<"*">>, - PrefixSearch = value_str(<<Arg/binary, Suffix/binary>>), + PrefixSearch = <<Prefix/binary, Suffix/binary>>, {op_field, {make_field(Path, Arg), PrefixSearch}}; % We're not checking the actual type here, just looking for % anything that has a possibility of matching by checking diff --git a/src/mango/test/03-operator-test.py b/src/mango/test/03-operator-test.py index b43aacf5f..3b1a46565 100644 --- a/src/mango/test/03-operator-test.py +++ b/src/mango/test/03-operator-test.py @@ -10,12 +10,13 @@ # License for the specific language governing permissions and limitations under # the License. +from requests.exceptions import HTTPError import mango import unittest class BaseOperatorTests: - class Common(object): + class Common(unittest.TestCase): def assertUserIds(self, user_ids, docs): user_ids_returned = list(d["user_id"] for d in docs) user_ids.sort() @@ -142,20 +143,36 @@ class BaseOperatorTests: self.assertNotIn("twitter", d) def test_beginswith(self): - docs = self.db.find({"location.state": {"$beginsWith": "New"}}) - self.assertEqual(len(docs), 2) - self.assertUserIds([2, 10], docs) + cases = [ + {"prefix": "New", "user_ids": [2, 10]}, + { + # test escaped characters - note the space in the test string + "prefix": "New ", + "user_ids": [2, 10], + }, + { + # non-string values in documents should not match the prefix, + # but should not error + "prefix": "Foo", + "user_ids": [], + }, + {"prefix": " New", "user_ids": []}, + ] - # non-string prefixes should return an error - def test_beginswith_invalid_prefix(self): - docs = self.db.find({"location.state": {"$beginsWith": 123}}) - self.assertEqual(len(docs), 2) + for case in cases: + with self.subTest(prefix=case["prefix"]): + selector = {"location.state": {"$beginsWith": case["prefix"]}} + docs = self.db.find(selector) + self.assertEqual(len(docs), len(case["user_ids"])) + self.assertUserIds(case["user_ids"], docs) - # non-string values in documents should not match the prefix, - # but should not error + # non-string prefixes should return an error def test_beginswith_invalid_prefix(self): - docs = self.db.find({"user_id": {"$beginsWith": "Foo"}}) - self.assertEqual(len(docs), 0) + cases = [123, True, [], {}] + for prefix in cases: + with self.subTest(prefix=prefix): + with self.assertRaises(HTTPError): + self.db.find({"location.state": {"$beginsWith": prefix}}) class OperatorJSONTests(mango.UserDocsTests, BaseOperatorTests.Common):
