Repository: incubator-beam Updated Branches: refs/heads/python-sdk 8eae855d6 -> b265dceaa
Add more documentation to datastore_wordcount example Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/62b8095e Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/62b8095e Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/62b8095e Branch: refs/heads/python-sdk Commit: 62b8095e7164a316b8ae93c7fefa41d38ee255a8 Parents: 8eae855 Author: Vikas Kedigehalli <vika...@google.com> Authored: Wed Dec 7 14:14:41 2016 -0800 Committer: Robert Bradshaw <rober...@gmail.com> Committed: Fri Dec 9 11:29:02 2016 -0800 ---------------------------------------------------------------------- .../examples/cookbook/datastore_wordcount.py | 46 +++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/62b8095e/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py index eb62614..9613402 100644 --- a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py +++ b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py @@ -15,7 +15,49 @@ # limitations under the License. # -"""A word-counting workflow that uses Google Cloud Datastore.""" +"""A word-counting workflow that uses Google Cloud Datastore. + +This example shows how to use ``datastoreio`` to read from and write to +Google Cloud Datastore. Note that running this example may incur charge for +Cloud Datastore operations. + +See https://developers.google.com/datastore/ for more details on Google Cloud +Datastore. +See http://beam.incubator.apache.org/get-started/quickstart on +how to run a Beam pipeline. + +Read-only Mode: In this mode, this example reads Cloud Datastore entities using +the ``datastoreio.ReadFromDatastore`` transform, extracts the words, +counts them and write the output to a set of files. + +The following options must be provided to run this pipeline in read-only mode: +`` +--project YOUR_PROJECT_ID +--kind YOUR_DATASTORE_KIND +--output [YOUR_LOCAL_FILE *or* gs://YOUR_OUTPUT_PATH] +--read-only +`` + +Read-write Mode: In this mode, this example reads words from an input file, +converts them to Cloud Datastore ``Entity`` objects and writes them to +Cloud Datastore using the ``datastoreio.Write`` transform. The second pipeline +will then read these Cloud Datastore entities using the +``datastoreio.ReadFromDatastore`` transform, extract the words, count them and +write the output to a set of files. + +The following options must be provided to run this pipeline in read-write mode: +`` +--project YOUR_PROJECT_ID +--kind YOUR_DATASTORE_KIND +--output [YOUR_LOCAL_FILE *or* gs://YOUR_OUTPUT_PATH] +`` + +Note: We are using the Cloud Datastore protobuf objects directly because +that is the interface that the ``datastoreio`` exposes. +See the following links on more information about these protobuf messages. +https://cloud.google.com/datastore/docs/reference/rpc/google.datastore.v1 and +https://github.com/googleapis/googleapis/tree/master/google/datastore/v1 +""" from __future__ import absolute_import @@ -196,7 +238,7 @@ def run(argv=None): if not known_args.read_only: write_to_datastore(gcloud_options.project, known_args, pipeline_options) - # Read from Datastore. + # Read entities from Datastore. result = read_from_datastore(gcloud_options.project, known_args, pipeline_options)