This is an automated email from the ASF dual-hosted git repository. vincbeck pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new 647e76716e Add distinct function to MongoHook in apache-airflow-providers-mongo (#34466) 647e76716e is described below commit 647e76716e38e07b71c8b7dedc4d3955aff110fb Author: Octávio Lage <m...@octavio.dev> AuthorDate: Wed Oct 25 13:52:01 2023 -0300 Add distinct function to MongoHook in apache-airflow-providers-mongo (#34466) --- airflow/providers/mongo/hooks/mongo.py | 24 ++++++++++++++++++++++++ tests/providers/mongo/hooks/test_mongo.py | 26 ++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/airflow/providers/mongo/hooks/mongo.py b/airflow/providers/mongo/hooks/mongo.py index fca855a51d..928618a9dc 100644 --- a/airflow/providers/mongo/hooks/mongo.py +++ b/airflow/providers/mongo/hooks/mongo.py @@ -366,3 +366,27 @@ class MongoHook(BaseHook): collection = self.get_collection(mongo_collection, mongo_db=mongo_db) return collection.delete_many(filter_doc, **kwargs) + + def distinct( + self, + mongo_collection: str, + distinct_key: str, + filter_doc: dict | None = None, + mongo_db: str | None = None, + **kwargs, + ) -> list[Any]: + """ + Returns a list of distinct values for the given key across a collection. + + https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.distinct + + :param mongo_collection: The name of the collection to perform distinct on. + :param distinct_key: The field to return distinct values from. + :param filter_doc: A query that matches the documents get distinct values from. + Can be omitted; then will cover the entire collection. + :param mongo_db: The name of the database to use. + Can be omitted; then the database from the connection string is used. + """ + collection = self.get_collection(mongo_collection, mongo_db=mongo_db) + + return collection.distinct(distinct_key, filter=filter_doc, **kwargs) diff --git a/tests/providers/mongo/hooks/test_mongo.py b/tests/providers/mongo/hooks/test_mongo.py index 4d46a613e7..19aa4928fc 100644 --- a/tests/providers/mongo/hooks/test_mongo.py +++ b/tests/providers/mongo/hooks/test_mongo.py @@ -303,6 +303,32 @@ class TestMongoHook: results = self.hook.aggregate(collection, aggregate_query) assert len(list(results)) == 2 + def test_distinct(self): + collection = mongomock.MongoClient().db.collection + objs = [ + {"test_id": "1", "test_status": "success"}, + {"test_id": "2", "test_status": "failure"}, + {"test_id": "3", "test_status": "success"}, + ] + + collection.insert_many(objs) + + results = self.hook.distinct(collection, "test_status") + assert len(results) == 2 + + def test_distinct_with_filter(self): + collection = mongomock.MongoClient().db.collection + objs = [ + {"test_id": "1", "test_status": "success"}, + {"test_id": "2", "test_status": "failure"}, + {"test_id": "3", "test_status": "success"}, + ] + + collection.insert_many(objs) + + results = self.hook.distinct(collection, "test_id", {"test_status": "failure"}) + assert len(results) == 1 + def test_context_manager(): with MongoHook(conn_id="mongo_default", mongo_db="default") as ctx_hook: