Repository: beam Updated Branches: refs/heads/master 2982238f7 -> c3141ed99
Remove dependency on datastore Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/22f3fb89 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/22f3fb89 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/22f3fb89 Branch: refs/heads/master Commit: 22f3fb892bf0717d52ce3b971e977797dd61ba4c Parents: e6b3883 Author: Sourabh Bajaj <sourabhba...@google.com> Authored: Mon Feb 20 20:49:17 2017 -0800 Committer: Ahmet Altay <al...@google.com> Committed: Tue Feb 21 16:04:44 2017 -0800 ---------------------------------------------------------------------- .../examples/cookbook/datastore_wordcount.py | 4 +-- .../examples/snippets/snippets_test.py | 9 +++++ .../google_cloud_platform/json_value_test.py | 2 +- .../datastore/v1/datastoreio.py | 10 ++++-- .../datastore/v1/datastoreio_test.py | 16 ++++++--- .../datastore/v1/fake_datastore.py | 10 ++++-- .../datastore/v1/helper.py | 23 +++++++----- .../datastore/v1/helper_test.py | 21 +++++++---- .../datastore/v1/query_splitter.py | 26 ++++++++------ .../datastore/v1/query_splitter_test.py | 17 ++++++--- sdks/python/apache_beam/runners/__init__.py | 2 +- .../internal/apiclient_test.py | 1 + .../tests/pipeline_verifiers_test.py | 2 +- sdks/python/pom.xml | 2 +- sdks/python/setup.py | 20 ++++++----- sdks/python/tox.ini | 37 ++++++++++++++++---- 16 files changed, 145 insertions(+), 57 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py index 5d3bef6..8d8bf16 100644 --- a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py +++ b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py @@ -72,8 +72,8 @@ from googledatastore import helper as datastore_helper, PropertyFilter import apache_beam as beam from apache_beam.io import ReadFromText -from apache_beam.io.datastore.v1.datastoreio import ReadFromDatastore -from apache_beam.io.datastore.v1.datastoreio import WriteToDatastore +from apache_beam.io.google_cloud_platform.datastore.v1.datastoreio import ReadFromDatastore +from apache_beam.io.google_cloud_platform.datastore.v1.datastoreio import WriteToDatastore from apache_beam.metrics import Metrics from apache_beam.utils.pipeline_options import GoogleCloudOptions from apache_beam.utils.pipeline_options import PipelineOptions http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/examples/snippets/snippets_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/examples/snippets/snippets_test.py b/sdks/python/apache_beam/examples/snippets/snippets_test.py index f9f1a13..efb400d 100644 --- a/sdks/python/apache_beam/examples/snippets/snippets_test.py +++ b/sdks/python/apache_beam/examples/snippets/snippets_test.py @@ -45,6 +45,14 @@ except ImportError: base_api = None # pylint: enable=wrong-import-order, wrong-import-position +# Protect against environments where datastore library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 +except ImportError: + datastore_pb2 = None +# pylint: enable=wrong-import-order, wrong-import-position + class ParDoTest(unittest.TestCase): """Tests for model/par-do.""" @@ -577,6 +585,7 @@ class SnippetsTest(unittest.TestCase): snippets.model_textio_compressed( {'read': gzip_file_name}, ['aa', 'bb', 'cc']) + @unittest.skipIf(datastore_pb2 is None, 'GCP dependencies are not installed') def test_model_datastoreio(self): # We cannot test datastoreio functionality in unit tests therefore we limit # ourselves to making sure the pipeline containing Datastore read and write http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/internal/google_cloud_platform/json_value_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/internal/google_cloud_platform/json_value_test.py b/sdks/python/apache_beam/internal/google_cloud_platform/json_value_test.py index 1a83008..72509a0 100644 --- a/sdks/python/apache_beam/internal/google_cloud_platform/json_value_test.py +++ b/sdks/python/apache_beam/internal/google_cloud_platform/json_value_test.py @@ -27,7 +27,7 @@ from apache_beam.internal.google_cloud_platform.json_value import to_json_value # pylint: disable=wrong-import-order, wrong-import-position try: from apitools.base.py.extra_types import JsonValue -except: +except ImportError: JsonValue = None # pylint: enable=wrong-import-order, wrong-import-position http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio.py b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio.py index 2eac4d5..335c532 100644 --- a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio.py +++ b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio.py @@ -19,8 +19,14 @@ import logging -from google.cloud.proto.datastore.v1 import datastore_pb2 -from googledatastore import helper as datastore_helper +# Protect against environments where datastore library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 + from googledatastore import helper as datastore_helper +except ImportError: + pass +# pylint: enable=wrong-import-order, wrong-import-position from apache_beam.io.google_cloud_platform.datastore.v1 import helper from apache_beam.io.google_cloud_platform.datastore.v1 import query_splitter http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio_test.py b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio_test.py index 1dd7779..5afb146 100644 --- a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio_test.py +++ b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/datastoreio_test.py @@ -17,10 +17,6 @@ import unittest -from google.cloud.proto.datastore.v1 import datastore_pb2 -from google.cloud.proto.datastore.v1 import query_pb2 -from google.protobuf import timestamp_pb2 -from googledatastore import helper as datastore_helper from mock import MagicMock, call, patch from apache_beam.io.google_cloud_platform.datastore.v1 import fake_datastore @@ -30,7 +26,19 @@ from apache_beam.io.google_cloud_platform.datastore.v1.datastoreio import _Mutat from apache_beam.io.google_cloud_platform.datastore.v1.datastoreio import ReadFromDatastore from apache_beam.io.google_cloud_platform.datastore.v1.datastoreio import WriteToDatastore +# Protect against environments where datastore library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 + from google.cloud.proto.datastore.v1 import query_pb2 + from google.protobuf import timestamp_pb2 + from googledatastore import helper as datastore_helper +except ImportError: + datastore_pb2 = None +# pylint: enable=wrong-import-order, wrong-import-position + +@unittest.skipIf(datastore_pb2 is None, 'GCP dependencies are not installed') class DatastoreioTest(unittest.TestCase): _PROJECT = 'project' _KIND = 'kind' http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/fake_datastore.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/fake_datastore.py b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/fake_datastore.py index ac8e0e0..bc4d07f 100644 --- a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/fake_datastore.py +++ b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/fake_datastore.py @@ -18,8 +18,14 @@ """Fake datastore used for unit testing.""" import uuid -from google.cloud.proto.datastore.v1 import datastore_pb2 -from google.cloud.proto.datastore.v1 import query_pb2 +# Protect against environments where datastore library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 + from google.cloud.proto.datastore.v1 import query_pb2 +except ImportError: + pass +# pylint: enable=wrong-import-order, wrong-import-position def create_run_query(entities, batch_size): http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper.py b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper.py index 1497862..e15e43b 100644 --- a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper.py +++ b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper.py @@ -18,13 +18,20 @@ """Cloud Datastore helper functions.""" import sys -from google.cloud.proto.datastore.v1 import datastore_pb2 -from google.cloud.proto.datastore.v1 import entity_pb2 -from google.cloud.proto.datastore.v1 import query_pb2 -from googledatastore import PropertyFilter, CompositeFilter -from googledatastore import helper as datastore_helper -from googledatastore.connection import Datastore -from googledatastore.connection import RPCError +# Protect against environments where datastore library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 + from google.cloud.proto.datastore.v1 import entity_pb2 + from google.cloud.proto.datastore.v1 import query_pb2 + from googledatastore import PropertyFilter, CompositeFilter + from googledatastore import helper as datastore_helper + from googledatastore.connection import Datastore + from googledatastore.connection import RPCError + QUERY_NOT_FINISHED = query_pb2.QueryResultBatch.NOT_FINISHED +except ImportError: + QUERY_NOT_FINISHED = None +# pylint: enable=wrong-import-order, wrong-import-position from apache_beam.internal import auth from apache_beam.utils import retry @@ -220,7 +227,7 @@ class QueryIterator(object): Entities are read in batches. Retries on failures. """ - _NOT_FINISHED = query_pb2.QueryResultBatch.NOT_FINISHED + _NOT_FINISHED = QUERY_NOT_FINISHED # Maximum number of results to request per query. _BATCH_SIZE = 500 http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper_test.py b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper_test.py index 689c462..6fa3eed 100644 --- a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper_test.py +++ b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/helper_test.py @@ -19,12 +19,6 @@ import sys import unittest -from google.cloud.proto.datastore.v1 import datastore_pb2 -from google.cloud.proto.datastore.v1 import entity_pb2 -from google.cloud.proto.datastore.v1 import query_pb2 -from google.cloud.proto.datastore.v1.entity_pb2 import Key -from googledatastore.connection import RPCError -from googledatastore import helper as datastore_helper from mock import MagicMock from apache_beam.io.google_cloud_platform.datastore.v1 import fake_datastore @@ -32,6 +26,21 @@ from apache_beam.io.google_cloud_platform.datastore.v1 import helper from apache_beam.tests.test_utils import patch_retry +# Protect against environments where apitools library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 + from google.cloud.proto.datastore.v1 import entity_pb2 + from google.cloud.proto.datastore.v1 import query_pb2 + from google.cloud.proto.datastore.v1.entity_pb2 import Key + from googledatastore.connection import RPCError + from googledatastore import helper as datastore_helper +except ImportError: + datastore_helper = None +# pylint: enable=wrong-import-order, wrong-import-position + + +@unittest.skipIf(datastore_helper is None, 'GCP dependencies are not installed') class HelperTest(unittest.TestCase): def setUp(self): http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter.py b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter.py index b101ad9..71590d3 100644 --- a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter.py +++ b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter.py @@ -18,11 +18,22 @@ """Implements a Cloud Datastore query splitter.""" from apache_beam.io.google_cloud_platform.datastore.v1 import helper -from google.cloud.proto.datastore.v1 import datastore_pb2 -from google.cloud.proto.datastore.v1 import query_pb2 -from google.cloud.proto.datastore.v1.query_pb2 import PropertyFilter -from google.cloud.proto.datastore.v1.query_pb2 import CompositeFilter -from googledatastore import helper as datastore_helper + +# Protect against environments where datastore library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 + from google.cloud.proto.datastore.v1 import query_pb2 + from google.cloud.proto.datastore.v1.query_pb2 import PropertyFilter + from google.cloud.proto.datastore.v1.query_pb2 import CompositeFilter + from googledatastore import helper as datastore_helper + UNSUPPORTED_OPERATORS = [PropertyFilter.LESS_THAN, + PropertyFilter.LESS_THAN_OR_EQUAL, + PropertyFilter.GREATER_THAN, + PropertyFilter.GREATER_THAN_OR_EQUAL] +except ImportError: + UNSUPPORTED_OPERATORS = None +# pylint: enable=wrong-import-order, wrong-import-position __all__ = [ @@ -34,11 +45,6 @@ KEY_PROPERTY_NAME = '__key__' # The number of keys to sample for each split. KEYS_PER_SPLIT = 32 -UNSUPPORTED_OPERATORS = [PropertyFilter.LESS_THAN, - PropertyFilter.LESS_THAN_OR_EQUAL, - PropertyFilter.GREATER_THAN, - PropertyFilter.GREATER_THAN_OR_EQUAL] - def get_splits(datastore, query, num_splits, partition=None): """Returns a list of sharded queries for the given Cloud Datastore query. http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter_test.py b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter_test.py index 676f311..9e87dec 100644 --- a/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter_test.py +++ b/sdks/python/apache_beam/io/google_cloud_platform/datastore/v1/query_splitter_test.py @@ -25,11 +25,18 @@ from mock import call from apache_beam.io.google_cloud_platform.datastore.v1 import fake_datastore from apache_beam.io.google_cloud_platform.datastore.v1 import query_splitter -from google.cloud.proto.datastore.v1 import datastore_pb2 -from google.cloud.proto.datastore.v1 import query_pb2 -from google.cloud.proto.datastore.v1.query_pb2 import PropertyFilter - - +# Protect against environments where datastore library is not available. +# pylint: disable=wrong-import-order, wrong-import-position +try: + from google.cloud.proto.datastore.v1 import datastore_pb2 + from google.cloud.proto.datastore.v1 import query_pb2 + from google.cloud.proto.datastore.v1.query_pb2 import PropertyFilter +except ImportError: + datastore_pb2 = None +# pylint: enable=wrong-import-order, wrong-import-position + + +@unittest.skipIf(datastore_pb2 is None, 'GCP dependencies are not installed') class QuerySplitterTest(unittest.TestCase): def test_get_splits_query_with_multiple_kinds(self): http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/runners/__init__.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/__init__.py b/sdks/python/apache_beam/runners/__init__.py index 8464992..a77c928 100644 --- a/sdks/python/apache_beam/runners/__init__.py +++ b/sdks/python/apache_beam/runners/__init__.py @@ -27,4 +27,4 @@ from apache_beam.runners.runner import PipelineState from apache_beam.runners.runner import create_runner from apache_beam.runners.google_cloud_dataflow.dataflow_runner import DataflowRunner -from apache_beam.runners.test.test_dataflow_runner import TestDataflowRunner +from apache_beam.runners.google_cloud_dataflow.test_dataflow_runner import TestDataflowRunner http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/runners/google_cloud_dataflow/internal/apiclient_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/google_cloud_dataflow/internal/apiclient_test.py b/sdks/python/apache_beam/runners/google_cloud_dataflow/internal/apiclient_test.py index 7adcf8b..2c53e37 100644 --- a/sdks/python/apache_beam/runners/google_cloud_dataflow/internal/apiclient_test.py +++ b/sdks/python/apache_beam/runners/google_cloud_dataflow/internal/apiclient_test.py @@ -34,6 +34,7 @@ except ImportError: apiclient = None # pylint: enable=wrong-import-order, wrong-import-position + @unittest.skipIf(apiclient is None, 'GCP dependencies are not installed') class UtilTest(unittest.TestCase): http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/apache_beam/tests/pipeline_verifiers_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/tests/pipeline_verifiers_test.py b/sdks/python/apache_beam/tests/pipeline_verifiers_test.py index 1801624..586af82 100644 --- a/sdks/python/apache_beam/tests/pipeline_verifiers_test.py +++ b/sdks/python/apache_beam/tests/pipeline_verifiers_test.py @@ -32,7 +32,7 @@ from apache_beam.tests.test_utils import patch_retry try: from apitools.base.py.exceptions import HttpError -except: +except ImportError: HttpError = None http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/pom.xml ---------------------------------------------------------------------- diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml index 86d28e8..98b7fa3 100644 --- a/sdks/python/pom.xml +++ b/sdks/python/pom.xml @@ -171,7 +171,7 @@ <executable>${python.user.base}/bin/tox</executable> <arguments> <argument>-e</argument> - <argument>py27</argument> + <argument>ALL</argument> <argument>-c</argument> <argument>tox.ini</argument> </arguments> http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/setup.py ---------------------------------------------------------------------- diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 935cb90..1dd0031 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -82,13 +82,11 @@ else: except ImportError: cythonize = lambda *args, **kwargs: [] + REQUIRED_PACKAGES = [ 'avro>=1.7.7,<2.0.0', 'crcmod>=1.7,<2.0', 'dill>=0.2.5,<0.3', - 'google-apitools>=0.5.6,<1.0.0', - 'proto-google-cloud-datastore-v1==0.90.0', - 'googledatastore==7.0.0', 'httplib2>=0.8,<0.10', 'mock>=1.0.1,<3.0.0', 'oauth2client>=2.0.1,<4.0.0', @@ -98,15 +96,17 @@ REQUIRED_PACKAGES = [ 'pyyaml>=3.10,<4.0.0', ] - REQUIRED_TEST_PACKAGES = [ 'nose-parameterized>=0.5.0,<0.6.0', 'pyhamcrest>=1.9,<2.0', ] -EXTRA_REQUIRES = { - 'docs': ['Sphinx>=1.5.2,<2.0'], -} +GCP_REQUIREMENTS = [ + 'google-apitools>=0.5.6,<1.0.0', + 'proto-google-cloud-datastore-v1==0.90.0', + 'googledatastore==7.0.0', +] + setuptools.setup( name=PACKAGE_NAME, @@ -132,7 +132,11 @@ setuptools.setup( install_requires=REQUIRED_PACKAGES, test_suite='nose.collector', tests_require=REQUIRED_TEST_PACKAGES, - extras_require=EXTRA_REQUIRES, + extras_require={ + 'docs': ['Sphinx>=1.5.2,<2.0'], + 'test': REQUIRED_TEST_PACKAGES, + 'gcp': GCP_REQUIREMENTS + }, zip_safe=False, # PyPI package information. classifiers=[ http://git-wip-us.apache.org/repos/asf/beam/blob/22f3fb89/sdks/python/tox.ini ---------------------------------------------------------------------- diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 2f5a7b5..521f106 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -16,7 +16,7 @@ ; [tox] -envlist = py27 +envlist = py27,py27gcp,lint,docs [pep8] # Disable all errors and warnings except for the ones related to blank lines. @@ -25,16 +25,41 @@ select = E3 [testenv:py27] # autocomplete_test depends on nose when invoked directly. -# run_pylint.sh depends on pep8 and pylint. -deps= +deps = nose - pep8 - pylint - Sphinx commands = python --version + pip install -e .[test] python apache_beam/examples/complete/autocomplete_test.py python setup.py test +passenv = TRAVIS* + +[testenv:py27gcp] +# autocomplete_test depends on nose when invoked directly. +deps = + nose +commands = + pip install -e .[test,gcp] + python --version + python apache_beam/examples/complete/autocomplete_test.py + python setup.py test +passenv = TRAVIS* + +[testenv:lint] +deps= + nose + pep8 + pylint +commands = + pip install -e .[test] {toxinidir}/run_pylint.sh +passenv = TRAVIS* + +[testenv:docs] +deps= + nose + Sphinx +commands = + pip install -e .[test,gcp,docs] {toxinidir}/generate_pydoc.sh passenv = TRAVIS*