[ https://issues.apache.org/jira/browse/BEAM-3041?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16299364#comment-16299364 ]
ASF GitHub Bot commented on BEAM-3041: -------------------------------------- aaltay closed pull request #4299: [BEAM-3041] Update wordcount fnapi example to use now supported save_main_session flag URL: https://github.com/apache/beam/pull/4299 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/sdks/python/apache_beam/examples/wordcount_fnapi.py b/sdks/python/apache_beam/examples/wordcount_fnapi.py index 5e92a237d0c..70474755b2b 100644 --- a/sdks/python/apache_beam/examples/wordcount_fnapi.py +++ b/sdks/python/apache_beam/examples/wordcount_fnapi.py @@ -26,6 +26,7 @@ import argparse import logging +import re import apache_beam as beam from apache_beam.io import ReadFromText @@ -35,6 +36,7 @@ from apache_beam.metrics.metric import MetricsFilter from apache_beam.options.pipeline_options import DebugOptions from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions class WordExtractingDoFn(beam.DoFn): @@ -59,12 +61,6 @@ def process(self, element): Returns: The processed element. """ - - # TODO(BEAM-3041): Move this import to top of the file after the fix. - # Portable containers does not support save main session, and importing here - # is required. This is only needed for running experimental jobs with FnApi. - import re - text_line = element.strip() if not text_line: self.empty_line_counter.inc(1) @@ -89,7 +85,10 @@ def run(argv=None): help='Output file to write results to.') known_args, pipeline_args = parser.parse_known_args(argv) + # We use the save_main_session option because one or more DoFn's in this + # workflow rely on global context (e.g., a module imported at module level). pipeline_options = PipelineOptions(pipeline_args) + pipeline_options.view_as(SetupOptions).save_main_session = True p = beam.Pipeline(options=pipeline_options) # Ensure that the experiment flag is set explicitly by the user. diff --git a/sdks/python/apache_beam/runners/dataflow/internal/dependency.py b/sdks/python/apache_beam/runners/dataflow/internal/dependency.py index 1fafd243dfc..a7aee1d88b8 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/dependency.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/dependency.py @@ -81,7 +81,7 @@ # Update this version to the next version whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. # This should be in the beam-[version]-[date] format, date is optional. -BEAM_FNAPI_CONTAINER_VERSION = 'beam-2.3.0-20171121' +BEAM_FNAPI_CONTAINER_VERSION = 'beam-2.3.0-20171219' # Standard file names used for staging files. WORKFLOW_TARBALL_FILE = 'workflow.tar.gz' ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Add portable Python SDK container setup support > ----------------------------------------------- > > Key: BEAM-3041 > URL: https://issues.apache.org/jira/browse/BEAM-3041 > Project: Beam > Issue Type: Improvement > Components: sdk-py-harness > Reporter: Henning Rohde > Assignee: Ahmet Altay > Labels: portability > Fix For: 2.3.0 > > > The minimal python container setup should be brought up to par with SDK > features: > - requirements.txt > - main session > - extra packages > The name of the SDK package in boot.go should also not be hardcoded. -- This message was sent by Atlassian JIRA (v6.4.14#64029)