[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266374&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266374 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:57 Start Date: 25/Jun/19 06:57 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297032378 ## File path: sdks/python/apache_beam/io/gcp/bigtableio_it_test.py ## @@ -0,0 +1,178 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" Integration test for GCP Bigtable testing.""" +from __future__ import absolute_import + +import argparse +import datetime +import logging +import random +import string +import time +import unittest + +import apache_beam as beam +from apache_beam.io import Read +from apache_beam.metrics.metric import MetricsFilter +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.runners.runner import PipelineState +from apache_beam.testing.util import assert_that, equal_to +from apache_beam.transforms.combiners import Count + +try: + from google.cloud.bigtable import enums, row, column_family, Client + from gcp.bigtableio import BigtableSource, WriteToBigTable +except ImportError: + Client = None + + +class GenerateTestRows(beam.PTransform): + """ A PTransform to generate dummy rows to write to a Bigtable Table. + + A PTransform that generates a list of `DirectRow` and writes it to a Bigtable Table. + """ + def __init__(self): +super(self.__class__, self).__init__() +self.beam_options = {'project_id': PROJECT_ID, + 'instance_id': INSTANCE_ID, + 'table_id': TABLE_ID} + + def _generate(self): +for i in range(ROW_COUNT): + key = "key_%s" % ('{0:012}'.format(i)) + test_row = row.DirectRow(row_key=key) + value = ''.join(random.choice(LETTERS_AND_DIGITS) for _ in range(CELL_SIZE)) + for j in range(COLUMN_COUNT): +test_row.set_cell(column_family_id=COLUMN_FAMILY_ID, + column=('field%s' % j).encode('utf-8'), + value=value, + timestamp=datetime.datetime.now()) + yield test_row + + def expand(self, pvalue): +return (pvalue +| beam.Create(self._generate()) +| WriteToBigTable(project_id=self.beam_options['project_id'], + instance_id=self.beam_options['instance_id'], + table_id=self.beam_options['table_id'])) + +@unittest.skipIf(Client is None, 'GCP Bigtable dependencies are not installed') +class BigtableIOTest(unittest.TestCase): + """ Bigtable IO Connector Test + + This tests the connector both ways, first writing rows to a new table, then reading them and comparing the counters + """ + def setUp(self): +self.result = None +self.table = Client(project=PROJECT_ID, admin=True)\ +.instance(instance_id=INSTANCE_ID)\ +.table(TABLE_ID) + +if not self.table.exists(): + column_families = {COLUMN_FAMILY_ID: column_family.MaxVersionsGCRule(2)} + self.table.create(column_families=column_families) + logging.info('Table {} has been created!'.format(TABLE_ID)) + + def test_bigtable_io(self): +print('Project ID: {}'.format(PROJECT_ID)) +print('Instance ID:{}'.format(INSTANCE_ID)) +print('Table ID: {}'.format(TABLE_ID)) + +pipeline_options = PipelineOptions(PIPELINE_PARAMETERS) +p = beam.Pipeline(options=pipeline_options) +_ = (p | 'Write Test Rows' >> GenerateTestRows()) + +self.result = p.run() +self.result.wait_until_finish() + +assert self.result.state == PipelineState.DONE + +if not hasattr(self.result, 'has_job') or self.result.has_job: + query_result = self.result.metrics().query(MetricsFilter().with_name('Written Row')) + if query_result['counters']: +read_counter = query_result['counters'][0] +logging.info('Number of Rows
[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266372&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266372 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:55 Start Date: 25/Jun/19 06:55 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297031653 ## File path: sdks/python/apache_beam/io/gcp/bigtableio.py ## @@ -141,3 +148,129 @@ def expand(self, pvalue): | beam.ParDo(_BigTableWriteFn(beam_options['project_id'], beam_options['instance_id'], beam_options['table_id']))) + + +class BigtableSource(iobase.BoundedSource): + def __init__(self, project_id, instance_id, table_id, filter_=None): +""" Constructor of the Read connector of Bigtable + +Args: + project_id: [string] GCP Project of to write the Rows + instance_id: [string] GCP Instance to write the Rows + table_id: [string] GCP Table to write the `DirectRows` + filter_: [RowFilter] Filter to apply to columns in a row. +""" +super(self.__class__, self).__init__() +self._init({'project_id': project_id, +'instance_id': instance_id, +'table_id': table_id, +'filter_': filter_}) + + def __getstate__(self): +return self.beam_options + + def __setstate__(self, options): +self._init(options) + + def _init(self, options): +self.beam_options = options +self.table = None +self.sample_row_keys = None +self.row_count = Metrics.counter(self.__class__.__name__, 'Row count') + + def _get_table(self): +if self.table is None: + self.table = Client(project=self.beam_options['project_id'])\ + .instance(self.beam_options['instance_id'])\ + .table(self.beam_options['table_id']) +return self.table + + def get_sample_row_keys(self): +""" Get a sample of row keys in the table. + +The returned row keys will delimit contiguous sections of the table of +approximately equal size, which can be used to break up the data for +distributed tasks like mapreduces. +:returns: A cancel-able iterator. Can be consumed by calling ``next()`` + or by casting to a :class:`list` and can be cancelled by + calling ``cancel()``. + +* NOTE: For unclear reasons, the function returns generator even +after wrapping the result as a list. In order to be used as a list, the +result should be wrapped as a list AGAIN! E.g., see 'estimate_size()' +""" +if self.sample_row_keys is None: + self.sample_row_keys = list(self._get_table().sample_row_keys()) +return self.sample_row_keys + + def get_range_tracker(self, start_position=b'', stop_position=b''): +if stop_position == b'': + return LexicographicKeyRangeTracker(start_position) +else: + return LexicographicKeyRangeTracker(start_position, stop_position) + + def estimate_size(self): +return list(self.get_sample_row_keys())[-1].offset_bytes + + def split(self, desired_bundle_size, start_position=None, stop_position=None): +""" Splits the source into a set of bundles, using the row_set if it is set. + +*** At this point, only splitting an entire table into samples based on the sample row keys is supported *** + +:param desired_bundle_size: the desired size (in bytes) of the bundles returned. +:param start_position: if specified, the position must be used as the starting position of the first bundle. +:param stop_position: if specified, the position must be used as the ending position of the last bundle. +Returns: + an iterator of objects of type 'SourceBundle' that gives information about the generated bundles. +""" + +if start_position is not None or stop_position is not None: + raise NotImplementedError + +# TODO: Use the desired bundle size to split accordingly +# TODO: Allow users to provide their own row sets + +sample_row_keys = list(self.get_sample_row_keys()) + +if len(sample_row_keys) > 1 and sample_row_keys[0].row_key != b'': + SampleRowKey = namedtuple("SampleRowKey", "row_key offset_bytes") + first_key = SampleRowKey(b'', 0) + sample_row_keys.insert(0, first_key) + sample_row_keys = list(sample_row_keys) + +bundles = [] +for i in range(1, len(sample_row_keys)): + key_1 = sample_row_keys[i - 1].row_key + key_2 = sample_row_keys[i].row_key + size = sample_row_keys[i].offset_bytes - sample_row_keys[i - 1].offset_bytes + bundles.append(iobase.SourceBundle(size, self, key_1, key_2)) + +#
[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266373&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266373 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:55 Start Date: 25/Jun/19 06:55 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297031764 ## File path: sdks/python/apache_beam/io/gcp/bigtableio_it_test.py ## @@ -0,0 +1,178 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" Integration test for GCP Bigtable testing.""" +from __future__ import absolute_import + +import argparse +import datetime +import logging +import random +import string +import time +import unittest + +import apache_beam as beam +from apache_beam.io import Read +from apache_beam.metrics.metric import MetricsFilter +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.runners.runner import PipelineState +from apache_beam.testing.util import assert_that, equal_to +from apache_beam.transforms.combiners import Count + +try: + from google.cloud.bigtable import enums, row, column_family, Client + from gcp.bigtableio import BigtableSource, WriteToBigTable +except ImportError: + Client = None + + +class GenerateTestRows(beam.PTransform): + """ A PTransform to generate dummy rows to write to a Bigtable Table. + + A PTransform that generates a list of `DirectRow` and writes it to a Bigtable Table. + """ + def __init__(self): +super(self.__class__, self).__init__() +self.beam_options = {'project_id': PROJECT_ID, + 'instance_id': INSTANCE_ID, + 'table_id': TABLE_ID} + + def _generate(self): +for i in range(ROW_COUNT): + key = "key_%s" % ('{0:012}'.format(i)) + test_row = row.DirectRow(row_key=key) + value = ''.join(random.choice(LETTERS_AND_DIGITS) for _ in range(CELL_SIZE)) + for j in range(COLUMN_COUNT): +test_row.set_cell(column_family_id=COLUMN_FAMILY_ID, + column=('field%s' % j).encode('utf-8'), + value=value, + timestamp=datetime.datetime.now()) + yield test_row + + def expand(self, pvalue): +return (pvalue +| beam.Create(self._generate()) +| WriteToBigTable(project_id=self.beam_options['project_id'], + instance_id=self.beam_options['instance_id'], + table_id=self.beam_options['table_id'])) + +@unittest.skipIf(Client is None, 'GCP Bigtable dependencies are not installed') +class BigtableIOTest(unittest.TestCase): + """ Bigtable IO Connector Test + + This tests the connector both ways, first writing rows to a new table, then reading them and comparing the counters + """ + def setUp(self): +self.result = None +self.table = Client(project=PROJECT_ID, admin=True)\ +.instance(instance_id=INSTANCE_ID)\ +.table(TABLE_ID) + +if not self.table.exists(): + column_families = {COLUMN_FAMILY_ID: column_family.MaxVersionsGCRule(2)} + self.table.create(column_families=column_families) + logging.info('Table {} has been created!'.format(TABLE_ID)) + + def test_bigtable_io(self): +print('Project ID: {}'.format(PROJECT_ID)) +print('Instance ID:{}'.format(INSTANCE_ID)) +print('Table ID: {}'.format(TABLE_ID)) + +pipeline_options = PipelineOptions(PIPELINE_PARAMETERS) +p = beam.Pipeline(options=pipeline_options) +_ = (p | 'Write Test Rows' >> GenerateTestRows()) + +self.result = p.run() +self.result.wait_until_finish() + +assert self.result.state == PipelineState.DONE + +if not hasattr(self.result, 'has_job') or self.result.has_job: + query_result = self.result.metrics().query(MetricsFilter().with_name('Written Row')) + if query_result['counters']: +read_counter = query_result['counters'][0] +logging.info('Number of Rows
[jira] [Work logged] (BEAM-7305) Add first version of Hazelcast Jet Runner
[ https://issues.apache.org/jira/browse/BEAM-7305?focusedWorklogId=266369&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266369 ] ASF GitHub Bot logged work on BEAM-7305: Author: ASF GitHub Bot Created on: 25/Jun/19 06:51 Start Date: 25/Jun/19 06:51 Worklog Time Spent: 10m Work Description: jbartok commented on pull request #8937: [BEAM-7305] Add Jet Runner to Get Started documentation page URL: https://github.com/apache/beam/pull/8937 Just updating documentation so that "Get Started" web page also mentions the Jet Runner. Thank you for your contribution! Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] [**Choose reviewer(s)**](https://beam.apache.org/contribute/#make-your-change) and mention them in a comment (`R: @username`). - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/) Python | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Python3_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python3_Verify/lastCompletedBuild/) | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/) [![Build Status](https://builds.apache.org/job/beam_Pos
[jira] [Work logged] (BEAM-7305) Add first version of Hazelcast Jet Runner
[ https://issues.apache.org/jira/browse/BEAM-7305?focusedWorklogId=266371&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266371 ] ASF GitHub Bot logged work on BEAM-7305: Author: ASF GitHub Bot Created on: 25/Jun/19 06:51 Start Date: 25/Jun/19 06:51 Worklog Time Spent: 10m Work Description: jbartok commented on issue #8937: [BEAM-7305] Add Jet Runner to Get Started documentation page URL: https://github.com/apache/beam/pull/8937#issuecomment-505311474 R: @mxm This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266371) Time Spent: 9h (was: 8h 50m) > Add first version of Hazelcast Jet Runner > - > > Key: BEAM-7305 > URL: https://issues.apache.org/jira/browse/BEAM-7305 > Project: Beam > Issue Type: New Feature > Components: runner-jet >Reporter: Maximilian Michels >Assignee: Jozsef Bartok >Priority: Major > Fix For: 2.14.0 > > Time Spent: 9h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266368&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266368 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:49 Start Date: 25/Jun/19 06:49 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297030060 ## File path: sdks/python/apache_beam/io/gcp/bigtableio.py ## @@ -141,3 +148,129 @@ def expand(self, pvalue): | beam.ParDo(_BigTableWriteFn(beam_options['project_id'], beam_options['instance_id'], beam_options['table_id']))) + + +class BigtableSource(iobase.BoundedSource): + def __init__(self, project_id, instance_id, table_id, filter_=None): +""" Constructor of the Read connector of Bigtable + +Args: + project_id: [string] GCP Project of to write the Rows + instance_id: [string] GCP Instance to write the Rows + table_id: [string] GCP Table to write the `DirectRows` + filter_: [RowFilter] Filter to apply to columns in a row. +""" +super(self.__class__, self).__init__() +self._init({'project_id': project_id, +'instance_id': instance_id, +'table_id': table_id, +'filter_': filter_}) + + def __getstate__(self): +return self.beam_options + + def __setstate__(self, options): +self._init(options) + + def _init(self, options): +self.beam_options = options +self.table = None +self.sample_row_keys = None +self.row_count = Metrics.counter(self.__class__.__name__, 'Row count') + + def _get_table(self): +if self.table is None: + self.table = Client(project=self.beam_options['project_id'])\ + .instance(self.beam_options['instance_id'])\ + .table(self.beam_options['table_id']) +return self.table + + def get_sample_row_keys(self): +""" Get a sample of row keys in the table. + +The returned row keys will delimit contiguous sections of the table of +approximately equal size, which can be used to break up the data for +distributed tasks like mapreduces. +:returns: A cancel-able iterator. Can be consumed by calling ``next()`` + or by casting to a :class:`list` and can be cancelled by + calling ``cancel()``. + +* NOTE: For unclear reasons, the function returns generator even +after wrapping the result as a list. In order to be used as a list, the +result should be wrapped as a list AGAIN! E.g., see 'estimate_size()' +""" +if self.sample_row_keys is None: + self.sample_row_keys = list(self._get_table().sample_row_keys()) +return self.sample_row_keys + + def get_range_tracker(self, start_position=b'', stop_position=b''): +if stop_position == b'': + return LexicographicKeyRangeTracker(start_position) +else: + return LexicographicKeyRangeTracker(start_position, stop_position) + + def estimate_size(self): +return list(self.get_sample_row_keys())[-1].offset_bytes + + def split(self, desired_bundle_size, start_position=None, stop_position=None): +""" Splits the source into a set of bundles, using the row_set if it is set. + +*** At this point, only splitting an entire table into samples based on the sample row keys is supported *** + +:param desired_bundle_size: the desired size (in bytes) of the bundles returned. +:param start_position: if specified, the position must be used as the starting position of the first bundle. +:param stop_position: if specified, the position must be used as the ending position of the last bundle. +Returns: + an iterator of objects of type 'SourceBundle' that gives information about the generated bundles. +""" + +if start_position is not None or stop_position is not None: + raise NotImplementedError + +# TODO: Use the desired bundle size to split accordingly +# TODO: Allow users to provide their own row sets + +sample_row_keys = list(self.get_sample_row_keys()) + +if len(sample_row_keys) > 1 and sample_row_keys[0].row_key != b'': + SampleRowKey = namedtuple("SampleRowKey", "row_key offset_bytes") + first_key = SampleRowKey(b'', 0) + sample_row_keys.insert(0, first_key) + sample_row_keys = list(sample_row_keys) + +bundles = [] +for i in range(1, len(sample_row_keys)): + key_1 = sample_row_keys[i - 1].row_key + key_2 = sample_row_keys[i].row_key + size = sample_row_keys[i].offset_bytes - sample_row_keys[i - 1].offset_bytes + bundles.append(iobase.SourceBundle(size, self, key_1, key_2)) Review
[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266365&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266365 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:49 Start Date: 25/Jun/19 06:49 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297029943 ## File path: sdks/python/apache_beam/io/gcp/bigtableio.py ## @@ -141,3 +148,129 @@ def expand(self, pvalue): | beam.ParDo(_BigTableWriteFn(beam_options['project_id'], beam_options['instance_id'], beam_options['table_id']))) + + +class BigtableSource(iobase.BoundedSource): + def __init__(self, project_id, instance_id, table_id, filter_=None): +""" Constructor of the Read connector of Bigtable + +Args: + project_id: [string] GCP Project of to write the Rows + instance_id: [string] GCP Instance to write the Rows + table_id: [string] GCP Table to write the `DirectRows` + filter_: [RowFilter] Filter to apply to columns in a row. +""" +super(self.__class__, self).__init__() +self._init({'project_id': project_id, +'instance_id': instance_id, +'table_id': table_id, +'filter_': filter_}) + + def __getstate__(self): +return self.beam_options + + def __setstate__(self, options): +self._init(options) + + def _init(self, options): +self.beam_options = options +self.table = None +self.sample_row_keys = None +self.row_count = Metrics.counter(self.__class__.__name__, 'Row count') + + def _get_table(self): +if self.table is None: + self.table = Client(project=self.beam_options['project_id'])\ + .instance(self.beam_options['instance_id'])\ + .table(self.beam_options['table_id']) +return self.table + + def get_sample_row_keys(self): +""" Get a sample of row keys in the table. + +The returned row keys will delimit contiguous sections of the table of +approximately equal size, which can be used to break up the data for +distributed tasks like mapreduces. +:returns: A cancel-able iterator. Can be consumed by calling ``next()`` + or by casting to a :class:`list` and can be cancelled by + calling ``cancel()``. + +* NOTE: For unclear reasons, the function returns generator even +after wrapping the result as a list. In order to be used as a list, the +result should be wrapped as a list AGAIN! E.g., see 'estimate_size()' +""" +if self.sample_row_keys is None: + self.sample_row_keys = list(self._get_table().sample_row_keys()) +return self.sample_row_keys + + def get_range_tracker(self, start_position=b'', stop_position=b''): +if stop_position == b'': Review comment: As the BoundedSource [BigtableSource] has been replaced by a PTransform/DoFn, this one no longer applies. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266365) Time Spent: 23h 40m (was: 23.5h) > Create a Cloud Bigtable IO connector for Python > --- > > Key: BEAM-3342 > URL: https://issues.apache.org/jira/browse/BEAM-3342 > Project: Beam > Issue Type: Bug > Components: sdk-py-core >Reporter: Solomon Duskis >Assignee: Solomon Duskis >Priority: Major > Time Spent: 23h 40m > Remaining Estimate: 0h > > I would like to create a Cloud Bigtable python connector. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266366&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266366 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:49 Start Date: 25/Jun/19 06:49 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297030035 ## File path: sdks/python/apache_beam/io/gcp/bigtableio.py ## @@ -141,3 +148,129 @@ def expand(self, pvalue): | beam.ParDo(_BigTableWriteFn(beam_options['project_id'], beam_options['instance_id'], beam_options['table_id']))) + + +class BigtableSource(iobase.BoundedSource): + def __init__(self, project_id, instance_id, table_id, filter_=None): +""" Constructor of the Read connector of Bigtable + +Args: + project_id: [string] GCP Project of to write the Rows + instance_id: [string] GCP Instance to write the Rows + table_id: [string] GCP Table to write the `DirectRows` + filter_: [RowFilter] Filter to apply to columns in a row. +""" +super(self.__class__, self).__init__() +self._init({'project_id': project_id, +'instance_id': instance_id, +'table_id': table_id, +'filter_': filter_}) + + def __getstate__(self): +return self.beam_options + + def __setstate__(self, options): +self._init(options) + + def _init(self, options): +self.beam_options = options +self.table = None +self.sample_row_keys = None +self.row_count = Metrics.counter(self.__class__.__name__, 'Row count') + + def _get_table(self): +if self.table is None: + self.table = Client(project=self.beam_options['project_id'])\ + .instance(self.beam_options['instance_id'])\ + .table(self.beam_options['table_id']) +return self.table + + def get_sample_row_keys(self): +""" Get a sample of row keys in the table. + +The returned row keys will delimit contiguous sections of the table of +approximately equal size, which can be used to break up the data for +distributed tasks like mapreduces. +:returns: A cancel-able iterator. Can be consumed by calling ``next()`` + or by casting to a :class:`list` and can be cancelled by + calling ``cancel()``. + +* NOTE: For unclear reasons, the function returns generator even +after wrapping the result as a list. In order to be used as a list, the +result should be wrapped as a list AGAIN! E.g., see 'estimate_size()' +""" +if self.sample_row_keys is None: + self.sample_row_keys = list(self._get_table().sample_row_keys()) +return self.sample_row_keys + + def get_range_tracker(self, start_position=b'', stop_position=b''): +if stop_position == b'': + return LexicographicKeyRangeTracker(start_position) +else: + return LexicographicKeyRangeTracker(start_position, stop_position) + + def estimate_size(self): +return list(self.get_sample_row_keys())[-1].offset_bytes + + def split(self, desired_bundle_size, start_position=None, stop_position=None): +""" Splits the source into a set of bundles, using the row_set if it is set. + +*** At this point, only splitting an entire table into samples based on the sample row keys is supported *** + +:param desired_bundle_size: the desired size (in bytes) of the bundles returned. +:param start_position: if specified, the position must be used as the starting position of the first bundle. +:param stop_position: if specified, the position must be used as the ending position of the last bundle. +Returns: + an iterator of objects of type 'SourceBundle' that gives information about the generated bundles. +""" + +if start_position is not None or stop_position is not None: + raise NotImplementedError + +# TODO: Use the desired bundle size to split accordingly Review comment: As the BoundedSource [BigtableSource] has been replaced by a PTransform/DoFn, this one no longer applies. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266366) Time Spent: 23h 50m (was: 23h 40m) > Create a Cloud Bigtable IO connector for Python > --- > > Key: BEAM-3342 >
[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266364&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266364 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:49 Start Date: 25/Jun/19 06:49 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297029891 ## File path: sdks/python/apache_beam/io/gcp/bigtableio.py ## @@ -141,3 +148,129 @@ def expand(self, pvalue): | beam.ParDo(_BigTableWriteFn(beam_options['project_id'], beam_options['instance_id'], beam_options['table_id']))) + + +class BigtableSource(iobase.BoundedSource): + def __init__(self, project_id, instance_id, table_id, filter_=None): +""" Constructor of the Read connector of Bigtable + +Args: + project_id: [string] GCP Project of to write the Rows + instance_id: [string] GCP Instance to write the Rows + table_id: [string] GCP Table to write the `DirectRows` + filter_: [RowFilter] Filter to apply to columns in a row. +""" +super(self.__class__, self).__init__() +self._init({'project_id': project_id, +'instance_id': instance_id, +'table_id': table_id, +'filter_': filter_}) + + def __getstate__(self): +return self.beam_options + + def __setstate__(self, options): +self._init(options) + + def _init(self, options): +self.beam_options = options +self.table = None +self.sample_row_keys = None +self.row_count = Metrics.counter(self.__class__.__name__, 'Row count') + + def _get_table(self): +if self.table is None: + self.table = Client(project=self.beam_options['project_id'])\ + .instance(self.beam_options['instance_id'])\ + .table(self.beam_options['table_id']) +return self.table + + def get_sample_row_keys(self): +""" Get a sample of row keys in the table. + +The returned row keys will delimit contiguous sections of the table of +approximately equal size, which can be used to break up the data for +distributed tasks like mapreduces. +:returns: A cancel-able iterator. Can be consumed by calling ``next()`` + or by casting to a :class:`list` and can be cancelled by + calling ``cancel()``. + +* NOTE: For unclear reasons, the function returns generator even Review comment: As the BoundedSource [BigtableSource] has been replaced by a PTransform/DoFn, this one no longer applies. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266364) Time Spent: 23.5h (was: 23h 20m) > Create a Cloud Bigtable IO connector for Python > --- > > Key: BEAM-3342 > URL: https://issues.apache.org/jira/browse/BEAM-3342 > Project: Beam > Issue Type: Bug > Components: sdk-py-core >Reporter: Solomon Duskis >Assignee: Solomon Duskis >Priority: Major > Time Spent: 23.5h > Remaining Estimate: 0h > > I would like to create a Cloud Bigtable python connector. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3342) Create a Cloud Bigtable IO connector for Python
[ https://issues.apache.org/jira/browse/BEAM-3342?focusedWorklogId=266363&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266363 ] ASF GitHub Bot logged work on BEAM-3342: Author: ASF GitHub Bot Created on: 25/Jun/19 06:48 Start Date: 25/Jun/19 06:48 Worklog Time Spent: 10m Work Description: mf2199 commented on pull request #8457: [BEAM-3342] Create a Cloud Bigtable IO connector for Python URL: https://github.com/apache/beam/pull/8457#discussion_r297029622 ## File path: sdks/python/apache_beam/io/gcp/bigtableio.py ## @@ -37,16 +37,23 @@ """ from __future__ import absolute_import +from collections import namedtuple +from random import shuffle + import apache_beam as beam +from apache_beam.io import iobase +from apache_beam.io.range_trackers import LexicographicKeyRangeTracker from apache_beam.metrics import Metrics from apache_beam.transforms.display import DisplayDataItem try: from google.cloud.bigtable import Client + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet except ImportError: - pass + Client = None -__all__ = ['WriteToBigTable'] +__all__ = ['BigtableSource', 'WriteToBigTable'] Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266363) Time Spent: 23h 20m (was: 23h 10m) > Create a Cloud Bigtable IO connector for Python > --- > > Key: BEAM-3342 > URL: https://issues.apache.org/jira/browse/BEAM-3342 > Project: Beam > Issue Type: Bug > Components: sdk-py-core >Reporter: Solomon Duskis >Assignee: Solomon Duskis >Priority: Major > Time Spent: 23h 20m > Remaining Estimate: 0h > > I would like to create a Cloud Bigtable python connector. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7475) Add Python stateful processing example in blog
[ https://issues.apache.org/jira/browse/BEAM-7475?focusedWorklogId=266351&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266351 ] ASF GitHub Bot logged work on BEAM-7475: Author: ASF GitHub Bot Created on: 25/Jun/19 06:09 Start Date: 25/Jun/19 06:09 Worklog Time Spent: 10m Work Description: rakeshcusat commented on issue #8803: [BEAM-7475] update wordcount example URL: https://github.com/apache/beam/pull/8803#issuecomment-505298718 R: @aaltay I have taken care of your comments. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266351) Time Spent: 4.5h (was: 4h 20m) > Add Python stateful processing example in blog > -- > > Key: BEAM-7475 > URL: https://issues.apache.org/jira/browse/BEAM-7475 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rakesh Kumar >Assignee: Rakesh Kumar >Priority: Major > Time Spent: 4.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7475) Add Python stateful processing example in blog
[ https://issues.apache.org/jira/browse/BEAM-7475?focusedWorklogId=266276&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266276 ] ASF GitHub Bot logged work on BEAM-7475: Author: ASF GitHub Bot Created on: 25/Jun/19 02:05 Start Date: 25/Jun/19 02:05 Worklog Time Spent: 10m Work Description: aaltay commented on pull request #8803: [BEAM-7475] update wordcount example URL: https://github.com/apache/beam/pull/8803#discussion_r296980410 ## File path: website/src/get-started/wordcount-example.md ## @@ -1362,7 +1379,20 @@ PCollection> wordCounts = windowedWords.apply(new WordCount.Cou ``` ```py -# This feature is not yet available in the Beam SDK for Python. +class CountWordsFn(Dofn): + def __init__(self): Review comment: I think we can drop the counter and __init__ for this snippet. (and `self.words_counter.inc(len(words)) below. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266276) > Add Python stateful processing example in blog > -- > > Key: BEAM-7475 > URL: https://issues.apache.org/jira/browse/BEAM-7475 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rakesh Kumar >Assignee: Rakesh Kumar >Priority: Major > Time Spent: 4h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7475) Add Python stateful processing example in blog
[ https://issues.apache.org/jira/browse/BEAM-7475?focusedWorklogId=266275&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266275 ] ASF GitHub Bot logged work on BEAM-7475: Author: ASF GitHub Bot Created on: 25/Jun/19 02:05 Start Date: 25/Jun/19 02:05 Worklog Time Spent: 10m Work Description: aaltay commented on pull request #8803: [BEAM-7475] update wordcount example URL: https://github.com/apache/beam/pull/8803#discussion_r296980315 ## File path: website/src/get-started/wordcount-example.md ## @@ -1362,7 +1379,20 @@ PCollection> wordCounts = windowedWords.apply(new WordCount.Cou ``` ```py -# This feature is not yet available in the Beam SDK for Python. +class CountWordsFn(Dofn): + def __init__(self): + self.words_counter = Metrics.counter(self.__class__, 'words') + + def process(self, element): + words = element.split() + self.words_counter.inc(len(words)) + +class CountWords(PTransform): + +def expand(self, pcoll): + return pcoll | FlatMap(CountWordsFN()) Review comment: Typo: `CountWordsFN` -> ` CountWordsFn` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266275) Time Spent: 4h 20m (was: 4h 10m) > Add Python stateful processing example in blog > -- > > Key: BEAM-7475 > URL: https://issues.apache.org/jira/browse/BEAM-7475 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rakesh Kumar >Assignee: Rakesh Kumar >Priority: Major > Time Spent: 4h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7141) Expose kv and window parameters for on_timer
[ https://issues.apache.org/jira/browse/BEAM-7141?focusedWorklogId=266273&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266273 ] ASF GitHub Bot logged work on BEAM-7141: Author: ASF GitHub Bot Created on: 25/Jun/19 01:59 Start Date: 25/Jun/19 01:59 Worklog Time Spent: 10m Work Description: aaltay commented on pull request #8739: BEAM-7141: add key value timer callback URL: https://github.com/apache/beam/pull/8739 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266273) Time Spent: 6h 20m (was: 6h 10m) > Expose kv and window parameters for on_timer > > > Key: BEAM-7141 > URL: https://issues.apache.org/jira/browse/BEAM-7141 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.12.0 >Reporter: Thomas Weise >Assignee: Rakesh Kumar >Priority: Major > Time Spent: 6h 20m > Remaining Estimate: 0h > > We would like to have access to key and window inside the timer callback. > Without, it is also difficult to debug. We run into this while working onΒ > BEAM-7112 -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7018) Regex transform for Python SDK
[ https://issues.apache.org/jira/browse/BEAM-7018?focusedWorklogId=266271&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266271 ] ASF GitHub Bot logged work on BEAM-7018: Author: ASF GitHub Bot Created on: 25/Jun/19 01:53 Start Date: 25/Jun/19 01:53 Worklog Time Spent: 10m Work Description: aaltay commented on issue #8859: [BEAM-7018] Added Regex transform for PythonSDK URL: https://github.com/apache/beam/pull/8859#issuecomment-505245457 @robertwb do you want to finish reviewing this? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266271) Time Spent: 2.5h (was: 2h 20m) > Regex transform for Python SDK > -- > > Key: BEAM-7018 > URL: https://issues.apache.org/jira/browse/BEAM-7018 > Project: Beam > Issue Type: New Feature > Components: sdk-py-core >Reporter: Rose Nguyen >Assignee: Shehzaad Nakhoda >Priority: Minor > Time Spent: 2.5h > Remaining Estimate: 0h > > PTransorms to use Regular Expressions to process elements in aΒ PCollection > It should offer the same API as its Java counterpart: > [https://github.com/apache/beam/blob/11a977b8b26eff2274d706541127c19dc93131a2/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266270&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266270 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 25/Jun/19 01:52 Start Date: 25/Jun/19 01:52 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on issue #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#issuecomment-505211244 @Robert, thanks for your comments, they are very helpful and thoughtful. I fixed all at new commit, can you please take a look when you have time? Many thanks. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266270) Time Spent: 9h 50m (was: 9h 40m) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 9h 50m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (BEAM-6696) GroupIntoBatches transform for Python SDK
[ https://issues.apache.org/jira/browse/BEAM-6696?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16871915#comment-16871915 ] Ahmet Altay commented on BEAM-6696: --- https://github.com/apache/beam/pull/8914 -- added this transform. However I will keep the JIRA open. [~robertwb] suggested merging GroupIntoBatches and BatchElement transforms by adding state backing to BatchElements. (Robert's comment here: https://github.com/apache/beam/pull/8914#issuecomment-504947559 : """ I suggest we update BatchElements to be backed by state, assuming we can do so without performance degredation. (The estimator is equivalent to fixed size when min == max.) It would also likely make sense to add auto-sizing capabilities to Java. (Another difference is that GroupIntoBatches requires keyed input, and batches per (and then drops) the key. If we keep the key, we should probably still emit it as we do for Java.) """ ) This also makes sense to me. However only issues I see is state does not work for all runners. > GroupIntoBatches transform for Python SDK > - > > Key: BEAM-6696 > URL: https://issues.apache.org/jira/browse/BEAM-6696 > Project: Beam > Issue Type: New Feature > Components: sdk-py-core >Reporter: Ahmet Altay >Assignee: Shehzaad Nakhoda >Priority: Major > > Add a PTransform that batches inputs to a desired batch size. Batches will > contain only elements of a single key. > It should offer the same API as its Java counterpart: > https://github.com/apache/beam/blob/11a977b8b26eff2274d706541127c19dc93131a2/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/GroupIntoBatches.java > Unlike BatchElements transform > (https://github.com/apache/beam/blob/11a977b8b26eff2274d706541127c19dc93131a2/sdks/python/apache_beam/transforms/util.py#L461) > GroupIntoBatches will use state to batch across bundles as well. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266268&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266268 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 25/Jun/19 01:47 Start Date: 25/Jun/19 01:47 Worklog Time Spent: 10m Work Description: aaltay commented on pull request #8903: [BEAM-7389] Add Python snippet for ParDo transform URL: https://github.com/apache/beam/pull/8903 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266268) Time Spent: 17h 40m (was: 17.5h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 17h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7629) Improve DoFn method validation in core/graph/fn.go
[ https://issues.apache.org/jira/browse/BEAM-7629?focusedWorklogId=266267&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266267 ] ASF GitHub Bot logged work on BEAM-7629: Author: ASF GitHub Bot Created on: 25/Jun/19 01:35 Start Date: 25/Jun/19 01:35 Worklog Time Spent: 10m Work Description: youngoli commented on issue #8936: [BEAM-7629] Go SDK additional Validation for DoFns (1st impl) (DO NOT MERGE) URL: https://github.com/apache/beam/pull/8936#issuecomment-505241954 R: @lostluck Hoping I could get your feedback on what I have so far and help answering some questions. 1. Some of my tests currently fail because the following is expected in signatures for all DoFn methods: `side input parameters must follow main input parameter`. My understanding however is that when including side inputs in the StartBundle and FinishBundle methods (if ProcessElement has some) only the side inputs should be included, with no main inputs. So theoretically it should work, but right now all methods are treated as if they're ProcessElement. Is this correct? 2. I had trouble finding info for the expected inputs and outputs of the DoFn methods, and had to resort to checking other SDKs. Is there a place where that's documented for the Go SDK? If not, could you clarify for StartBundle, FinishBundle, Setup, and Teardown what parameters and return values they cab have (in addition to the side inputs and emits that I already account for)? Right now I make an assumption that Setup/Teardown shouldn't have any parameters based on the Java SDK, but couldn't find any concrete info for return values for any of the methods. 3. Should I worry about the Go SDK "generics" when it comes to the side input and emit validation? Is it even valid for a user to use generics to replace emits or side inputs there, and if so should I be checking that they're consistent? For this first draft I just ignored the possibility of generics. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266267) Time Spent: 20m (was: 10m) > Improve DoFn method validation in core/graph/fn.go > -- > > Key: BEAM-7629 > URL: https://issues.apache.org/jira/browse/BEAM-7629 > Project: Beam > Issue Type: Improvement > Components: sdk-go >Reporter: Daniel Oliveira >Assignee: Daniel Oliveira >Priority: Major > Time Spent: 20m > Remaining Estimate: 0h > > Various improvements can be made to validating the signatures and type usages > in DoFns. Some things that should probably be checked: > * Check that StartBundle and FinishBundle contain any emit parameters andΒ > side inputs present in ProcessElement > * Check that any side inputs/emits have correctly matching types between > Start/FinishBundle and ProcessElement > * Check that parameters and return values for the various methods are valid > (for ex. Teardown/Setup should have no params) -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7629) Improve DoFn method validation in core/graph/fn.go
[ https://issues.apache.org/jira/browse/BEAM-7629?focusedWorklogId=266262&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266262 ] ASF GitHub Bot logged work on BEAM-7629: Author: ASF GitHub Bot Created on: 25/Jun/19 01:26 Start Date: 25/Jun/19 01:26 Worklog Time Spent: 10m Work Description: youngoli commented on pull request #8936: [BEAM-7629] Go SDK additional Validation for DoFns (1st impl) (DO NOT MERGE) URL: https://github.com/apache/beam/pull/8936 This is the first draft of this CL for additional DoFn validation. It adds validation for Emits and SideInputs in StartBundle and FinishBundle. Currently does not pass all tests. DO NOT MERGE Thank you for your contribution! Follow this checklist to help us incorporate your contribution quickly and easily: - [x] [**Choose reviewer(s)**](https://beam.apache.org/contribute/#make-your-change) and mention them in a comment (`R: @username`). - [x] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/) Python | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Python3_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python3_Verify/lastCompletedBuild/) | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://build
[jira] [Created] (BEAM-7629) Improve DoFn method validation in core/graph/fn.go
Daniel Oliveira created BEAM-7629: - Summary: Improve DoFn method validation in core/graph/fn.go Key: BEAM-7629 URL: https://issues.apache.org/jira/browse/BEAM-7629 Project: Beam Issue Type: Improvement Components: sdk-go Reporter: Daniel Oliveira Assignee: Daniel Oliveira Various improvements can be made to validating the signatures and type usages in DoFns. Some things that should probably be checked: * Check that StartBundle and FinishBundle contain any emit parameters andΒ side inputs present in ProcessElement * Check that any side inputs/emits have correctly matching types between Start/FinishBundle and ProcessElement * Check that parameters and return values for the various methods are valid (for ex. Teardown/Setup should have no params) -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Updated] (BEAM-7629) Improve DoFn method validation in core/graph/fn.go
[ https://issues.apache.org/jira/browse/BEAM-7629?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Daniel Oliveira updated BEAM-7629: -- Status: Open (was: Triage Needed) > Improve DoFn method validation in core/graph/fn.go > -- > > Key: BEAM-7629 > URL: https://issues.apache.org/jira/browse/BEAM-7629 > Project: Beam > Issue Type: Improvement > Components: sdk-go >Reporter: Daniel Oliveira >Assignee: Daniel Oliveira >Priority: Major > > Various improvements can be made to validating the signatures and type usages > in DoFns. Some things that should probably be checked: > * Check that StartBundle and FinishBundle contain any emit parameters andΒ > side inputs present in ProcessElement > * Check that any side inputs/emits have correctly matching types between > Start/FinishBundle and ProcessElement > * Check that parameters and return values for the various methods are valid > (for ex. Teardown/Setup should have no params) -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266248&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266248 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 25/Jun/19 00:56 Start Date: 25/Jun/19 00:56 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on issue #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#issuecomment-505235019 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266248) Time Spent: 9h 40m (was: 9.5h) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 9h 40m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Closed] (BEAM-7618) Revert moving mongodbio import to try block
[ https://issues.apache.org/jira/browse/BEAM-7618?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Yichi Zhang closed BEAM-7618. - Resolution: Invalid Fix Version/s: Not applicable > Revert moving mongodbio import to try block > --- > > Key: BEAM-7618 > URL: https://issues.apache.org/jira/browse/BEAM-7618 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Reporter: Yichi Zhang >Assignee: Yichi Zhang >Priority: Trivial > Fix For: Not applicable > > > revert changes inΒ [https://github.com/apache/beam/pull/8920] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266196&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266196 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:54 Start Date: 24/Jun/19 22:54 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on issue #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#issuecomment-505211244 Robert, thanks for your comments, they are very helpful and thoughtful. I fixed all at new commit, can you please take a look when you have time? Many thanks. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266196) Time Spent: 9.5h (was: 9h 20m) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 9.5h > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266195&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266195 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:53 Start Date: 24/Jun/19 22:53 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296947126 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -284,6 +322,8 @@ def run_pipeline(self, pipeline, options): pipeline.visit(DataflowRunner.group_by_key_input_visitor()) self._bundle_repeat = self._bundle_repeat or options.view_as( pipeline_options.DirectOptions).direct_runner_bundle_repeat +FnApiRunner._num_workers = max(FnApiRunner._num_workers, options.view_as( Review comment: Changed to support option only. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266195) Time Spent: 9h 20m (was: 9h 10m) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 9h 20m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266194&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266194 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:53 Start Date: 24/Jun/19 22:53 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296947021 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -179,16 +197,33 @@ def __iter__(self): # May need to revise. trigger_driver = trigger.create_trigger_driver(self._windowing, True) windowed_key_values = trigger_driver.process_entire_key - coder_impl = self._post_grouped_coder.get_impl() + key_coder_impl = self._key_coder.get_impl() + coder_impl = self._post_grouped_coder.get_impl() for encoded_key, windowed_values in self._table.items(): key = key_coder_impl.decode(encoded_key) +output_stream = create_OutputStream() for wkvs in windowed_key_values(key, windowed_values): coder_impl.encode_to_stream(wkvs, output_stream, True) - self._grouped_output = [output_stream.get()] + +self._grouped_output.append(output_stream.get()) Review comment: Yeah, changed to `partition(n)` at new commit.. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266194) Time Spent: 9h 10m (was: 9h) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 9h 10m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266191&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266191 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:51 Start Date: 24/Jun/19 22:51 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296946514 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -139,6 +139,25 @@ def done(self): self._state = self.DONE_STATE +class _PartitionBuffer(object): + """This class is created to support partition(n) function.""" + def __init__(self, inputs): +self._inputs = inputs + + def partition(self, n): +v = list(self._inputs.values())[0] +if isinstance(v, list): Review comment: I added `_ListBuffer` class to support partitioning for list. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266191) Time Spent: 8h 40m (was: 8.5h) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 8h 40m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266193&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266193 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:52 Start Date: 24/Jun/19 22:52 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296946744 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -139,6 +139,25 @@ def done(self): self._state = self.DONE_STATE +class _PartitionBuffer(object): + """This class is created to support partition(n) function.""" + def __init__(self, inputs): +self._inputs = inputs + + def partition(self, n): +v = list(self._inputs.values())[0] +if isinstance(v, list): + return [self._inputs] Review comment: It turns out that we cannot split inputs with a timer. This is fixed at new commit. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266193) Time Spent: 9h (was: 8h 50m) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 9h > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266192&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266192 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:51 Start Date: 24/Jun/19 22:51 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296946547 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -139,6 +139,25 @@ def done(self): self._state = self.DONE_STATE +class _PartitionBuffer(object): + """This class is created to support partition(n) function.""" + def __init__(self, inputs): +self._inputs = inputs + + def partition(self, n): +v = list(self._inputs.values())[0] Review comment: This is removed now. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266192) Time Spent: 8h 50m (was: 8h 40m) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 8h 50m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266188&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266188 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:50 Start Date: 24/Jun/19 22:50 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296946300 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -691,13 +737,14 @@ def input_for(ptransform_id, input_id): if other_input not in deferred_inputs: deferred_inputs[other_input] = [] # TODO(robertwb): merge results -last_result, splits = BundleManager( +last_result, splits = ParallelBundleManager( Review comment: I changed it to always use `ParallelBundleManager`, when num_workers = 1, inputs will not be spitted. Does this sound good? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266188) Time Spent: 8h 20m (was: 8h 10m) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 8h 20m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266189&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266189 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:50 Start Date: 24/Jun/19 22:50 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296946421 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -139,6 +139,25 @@ def done(self): self._state = self.DONE_STATE +class _PartitionBuffer(object): Review comment: This class is removed now. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266189) Time Spent: 8.5h (was: 8h 20m) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 8.5h > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-3645) Support multi-process execution on the FnApiRunner
[ https://issues.apache.org/jira/browse/BEAM-3645?focusedWorklogId=266186&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266186 ] ASF GitHub Bot logged work on BEAM-3645: Author: ASF GitHub Bot Created on: 24/Jun/19 22:49 Start Date: 24/Jun/19 22:49 Worklog Time Spent: 10m Work Description: Hannah-Jiang commented on pull request #8872: [BEAM-3645] add ParallelBundleManager URL: https://github.com/apache/beam/pull/8872#discussion_r296946002 ## File path: sdks/python/apache_beam/runners/portability/fn_api_runner.py ## @@ -141,13 +141,18 @@ def done(self): class _GroupingBuffer(object): """Used to accumulate groupded (shuffled) results.""" - def __init__(self, pre_grouped_coder, post_grouped_coder, windowing): + def __init__(self, pre_grouped_coder, post_grouped_coder, windowing, + num_workers=1): self._key_coder = pre_grouped_coder.key_coder() self._pre_grouped_coder = pre_grouped_coder self._post_grouped_coder = post_grouped_coder self._table = collections.defaultdict(list) +self._table_count = 0 Review comment: It is not used and removed at the new commit. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266186) Time Spent: 8h 10m (was: 8h) > Support multi-process execution on the FnApiRunner > -- > > Key: BEAM-3645 > URL: https://issues.apache.org/jira/browse/BEAM-3645 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Affects Versions: 2.2.0, 2.3.0 >Reporter: Charles Chen >Assignee: Hannah Jiang >Priority: Major > Time Spent: 8h 10m > Remaining Estimate: 0h > > https://issues.apache.org/jira/browse/BEAM-3644Β gave us a 15x performance > gain over the previous DirectRunner.Β We can do even better in multi-core > environments by supporting multi-process execution in the FnApiRunner, to > scale past Python GIL limitations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7437) Integration Test for BQ streaming inserts for streaming pipelines
[ https://issues.apache.org/jira/browse/BEAM-7437?focusedWorklogId=266181&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266181 ] ASF GitHub Bot logged work on BEAM-7437: Author: ASF GitHub Bot Created on: 24/Jun/19 22:37 Start Date: 24/Jun/19 22:37 Worklog Time Spent: 10m Work Description: ttanay commented on issue #8934: [BEAM-7437] Add streaming flag to BQ streaming inserts IT test URL: https://github.com/apache/beam/pull/8934#issuecomment-505207401 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266181) Time Spent: 2h 20m (was: 2h 10m) > Integration Test for BQ streaming inserts for streaming pipelines > - > > Key: BEAM-7437 > URL: https://issues.apache.org/jira/browse/BEAM-7437 > Project: Beam > Issue Type: Test > Components: io-python-gcp >Affects Versions: 2.12.0 >Reporter: Tanay Tummalapalli >Assignee: Tanay Tummalapalli >Priority: Minor > Labels: test > Fix For: 2.14.0 > > Time Spent: 2h 20m > Remaining Estimate: 0h > > Integration Test for BigQuery Sink using Streaming Inserts for streaming > pipelines. > Integration tests currently exist for batch pipelines, it can also be added > for streaming pipelines using TestStream. This will be a precursor to the > failing integration test to be added for [BEAM-6611| > https://issues.apache.org/jira/browse/BEAM-6611]. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-6611) A Python Sink for BigQuery with File Loads in Streaming
[ https://issues.apache.org/jira/browse/BEAM-6611?focusedWorklogId=266171&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266171 ] ASF GitHub Bot logged work on BEAM-6611: Author: ASF GitHub Bot Created on: 24/Jun/19 22:25 Start Date: 24/Jun/19 22:25 Worklog Time Spent: 10m Work Description: ttanay commented on issue #8871: [BEAM-6611] BigQuery file loads in Streaming for Python SDK URL: https://github.com/apache/beam/pull/8871#issuecomment-505204603 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266171) Time Spent: 50m (was: 40m) > A Python Sink for BigQuery with File Loads in Streaming > --- > > Key: BEAM-6611 > URL: https://issues.apache.org/jira/browse/BEAM-6611 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Reporter: Pablo Estrada >Assignee: Tanay Tummalapalli >Priority: Major > Labels: gsoc, gsoc2019, mentor > Time Spent: 50m > Remaining Estimate: 0h > > The Java SDK supports a bunch of methods for writing data into BigQuery, > while the Python SDK supports the following: > - Streaming inserts for streaming pipelines [As seen in [bigquery.py and > BigQueryWriteFn|https://github.com/apache/beam/blob/master/sdks/python/apache_beam/io/gcp/bigquery.py#L649-L813]] > - File loads for batch pipelines [As implemented in [PR > 7655|https://github.com/apache/beam/pull/7655]] > Qucik and dirty early design doc: https://s.apache.org/beam-bqfl-py-streaming > The Java SDK also supports File Loads for Streaming pipelines [see BatchLoads > application|https://github.com/apache/beam/blob/master/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java#L1709-L1776]. > File loads have the advantage of being much cheaper than streaming inserts > (although they also are slower for the records to show up in the table). -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Comment Edited] (BEAM-7013) A new count distinct transform based on BigQuery compatible HyperLogLog++ implementation
[ https://issues.apache.org/jira/browse/BEAM-7013?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16871825#comment-16871825 ] Yueyang Qiu edited comment on BEAM-7013 at 6/24/19 10:08 PM: - Update:Β A BigQuery-compatibleΒ implementation of HyperLogLog++ has been open-sourced inΒ [https://github.com/google/zetasketch], and a design doc ([https://docs.google.com/document/d/1U5aXdC9lDSOqT6FPHRulp-EutYiQ9KeHpgu-19CIfEI]) about integrating it into Beam has been sent out to d...@beam.apache.org. was (Author: robinyqiu): Update: Google's implementation of HyperLogLog++ has been open-sourced inΒ [https://github.com/google/zetasketch], and a design doc ([https://docs.google.com/document/d/1U5aXdC9lDSOqT6FPHRulp-EutYiQ9KeHpgu-19CIfEI]) about integrating it into Beam has been sent out to d...@beam.apache.org. > A new count distinct transform based on BigQuery compatible HyperLogLog++ > implementation > > > Key: BEAM-7013 > URL: https://issues.apache.org/jira/browse/BEAM-7013 > Project: Beam > Issue Type: New Feature > Components: extensions-java-sketching, sdk-java-core >Reporter: Yueyang Qiu >Assignee: Yueyang Qiu >Priority: Major > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (BEAM-7013) A new count distinct transform based on BigQuery compatible HyperLogLog++ implementation
[ https://issues.apache.org/jira/browse/BEAM-7013?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16871825#comment-16871825 ] Yueyang Qiu commented on BEAM-7013: --- Update: Google's implementation of HyperLogLog++ has been open-sourced inΒ [https://github.com/google/zetasketch], and a [design doc |[https://docs.google.com/document/d/1U5aXdC9lDSOqT6FPHRulp-EutYiQ9KeHpgu-19CIfEI]]about integrating it into Beam has been sent out to d...@beam.apache.org. > A new count distinct transform based on BigQuery compatible HyperLogLog++ > implementation > > > Key: BEAM-7013 > URL: https://issues.apache.org/jira/browse/BEAM-7013 > Project: Beam > Issue Type: New Feature > Components: extensions-java-sketching, sdk-java-core >Reporter: Yueyang Qiu >Assignee: Yueyang Qiu >Priority: Major > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Comment Edited] (BEAM-7013) A new count distinct transform based on BigQuery compatible HyperLogLog++ implementation
[ https://issues.apache.org/jira/browse/BEAM-7013?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16871825#comment-16871825 ] Yueyang Qiu edited comment on BEAM-7013 at 6/24/19 10:07 PM: - Update: Google's implementation of HyperLogLog++ has been open-sourced inΒ [https://github.com/google/zetasketch], and a design doc ([https://docs.google.com/document/d/1U5aXdC9lDSOqT6FPHRulp-EutYiQ9KeHpgu-19CIfEI]) about integrating it into Beam has been sent out to d...@beam.apache.org. was (Author: robinyqiu): Update: Google's implementation of HyperLogLog++ has been open-sourced inΒ [https://github.com/google/zetasketch], and a [design doc |[https://docs.google.com/document/d/1U5aXdC9lDSOqT6FPHRulp-EutYiQ9KeHpgu-19CIfEI]]about integrating it into Beam has been sent out to d...@beam.apache.org. > A new count distinct transform based on BigQuery compatible HyperLogLog++ > implementation > > > Key: BEAM-7013 > URL: https://issues.apache.org/jira/browse/BEAM-7013 > Project: Beam > Issue Type: New Feature > Components: extensions-java-sketching, sdk-java-core >Reporter: Yueyang Qiu >Assignee: Yueyang Qiu >Priority: Major > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7606) SchemaUtilTest.testBeamRowMapper_datetime is broken
[ https://issues.apache.org/jira/browse/BEAM-7606?focusedWorklogId=266122&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266122 ] ASF GitHub Bot logged work on BEAM-7606: Author: ASF GitHub Bot Created on: 24/Jun/19 21:15 Start Date: 24/Jun/19 21:15 Worklog Time Spent: 10m Work Description: iemejia commented on issue #8924: [BEAM-7606] Fix JDBC time conversion tests URL: https://github.com/apache/beam/pull/8924#issuecomment-505184214 More info in my execution environment (I could reproduce this with both debian and ubuntu). ``` $ java -version openjdk version "1.8.0_212" OpenJDK Runtime Environment (build 1.8.0_212-8u212-b01-1-b01) OpenJDK 64-Bit Server VM (build 25.212-b01, mixed mode) $ uname -a Linux myhost 4.19.0-5-amd64 #1 SMP Debian 4.19.37-5 (2019-06-19) x86_64 GNU/Linux $ env | grep TZ TZ=Europe/Paris ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266122) Time Spent: 50m (was: 40m) > SchemaUtilTest.testBeamRowMapper_datetime is broken > --- > > Key: BEAM-7606 > URL: https://issues.apache.org/jira/browse/BEAM-7606 > Project: Beam > Issue Type: Bug > Components: io-java-jdbc >Reporter: IsmaΓ«l MejΓa >Assignee: Charith Ellawala >Priority: Critical > Fix For: 2.14.0 > > Time Spent: 50m > Remaining Estimate: 0h > > It seems that after merging the support for Beam Rows in JdbcIO (BEAM-6674) > the _Test is broken, can somebody please take a look. > CC: [~reuvenlax] > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Updated] (BEAM-7628) Retry createJob requests in Dataflow Runner for retriable errors.
[ https://issues.apache.org/jira/browse/BEAM-7628?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Valentyn Tymofieiev updated BEAM-7628: -- Issue Type: Improvement (was: Bug) > Retry createJob requests in Dataflow Runner for retriable errors. > - > > Key: BEAM-7628 > URL: https://issues.apache.org/jira/browse/BEAM-7628 > Project: Beam > Issue Type: Improvement > Components: runner-dataflow >Reporter: Valentyn Tymofieiev >Priority: Minor > > When Dataflow Runner is sending a job for remote execution, such requests in > rare cases might fail with retriable errors. Dataflow Runner could recognize > a class of retriable errors and attempt to resubmit the job again when such > errors are encountered. Sample retriable error encountered by Beam Java SDK: > ``` > java.lang.RuntimeException: Failed to create a workflow job: The operation > was cancelled. > 11:32:14 at > org.apache.beam.runners.dataflow.DataflowRunner.run(DataflowRunner.java:869) > 11:32:14 at > org.apache.beam.runners.dataflow.DataflowRunner.run(DataflowRunner.java:178) > 11:32:14 at org.apache.beam.sdk.Pipeline.run(Pipeline.java:313) > 11:32:14 at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > ... > 11:32:14 Caused by: > com.google.api.client.googleapis.json.GoogleJsonResponseException: 499 Client > Closed Request > 11:32:14 { > 11:32:14 "code" : 499, > 11:32:14 "errors" : [ { > 11:32:14 "domain" : "global", > 11:32:14 "message" : "The operation was cancelled.", > 11:32:14 "reason" : "backendError" > 11:32:14 } ], > 11:32:14 "message" : "The operation was cancelled.", > 11:32:14 "status" : "CANCELLED" > 11:32:14 } > 11:32:14 at > com.google.api.client.googleapis.json.GoogleJsonResponseException.from(GoogleJsonResponseException.java:146) > 11:32:14 at > com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:113) > 11:32:14 at > com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:40) > 11:32:14 at > com.google.api.client.googleapis.services.AbstractGoogleClientRequest$1.interceptResponse(AbstractGoogleClientRequest.java:321) > 11:32:14 at > com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1067) > 11:32:14 at > com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:419) > 11:32:14 at > com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:352) > 11:32:14 at > com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:469) > 11:32:14 at > org.apache.beam.runners.dataflow.DataflowClient.createJob(DataflowClient.java:61) > 11:32:14 at > org.apache.beam.runners.dataflow.DataflowRunner.run(DataflowRunner.java:855) > 11:32:14 ... 41 more' > ``` -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266118&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266118 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 21:10 Start Date: 24/Jun/19 21:10 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505182616 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266118) Time Spent: 3h 10m (was: 3h) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 3h 10m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Created] (BEAM-7628) Retry createJob requests in Dataflow Runner for retriable errors.
Valentyn Tymofieiev created BEAM-7628: - Summary: Retry createJob requests in Dataflow Runner for retriable errors. Key: BEAM-7628 URL: https://issues.apache.org/jira/browse/BEAM-7628 Project: Beam Issue Type: Bug Components: runner-dataflow Reporter: Valentyn Tymofieiev When Dataflow Runner is sending a job for remote execution, such requests in rare cases might fail with retriable errors. Dataflow Runner could recognize a class of retriable errors and attempt to resubmit the job again when such errors are encountered. Sample retriable error encountered by Beam Java SDK: ``` java.lang.RuntimeException: Failed to create a workflow job: The operation was cancelled. 11:32:14at org.apache.beam.runners.dataflow.DataflowRunner.run(DataflowRunner.java:869) 11:32:14at org.apache.beam.runners.dataflow.DataflowRunner.run(DataflowRunner.java:178) 11:32:14at org.apache.beam.sdk.Pipeline.run(Pipeline.java:313) 11:32:14at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) ... 11:32:14 Caused by: com.google.api.client.googleapis.json.GoogleJsonResponseException: 499 Client Closed Request 11:32:14 { 11:32:14 "code" : 499, 11:32:14 "errors" : [ { 11:32:14 "domain" : "global", 11:32:14 "message" : "The operation was cancelled.", 11:32:14 "reason" : "backendError" 11:32:14 } ], 11:32:14 "message" : "The operation was cancelled.", 11:32:14 "status" : "CANCELLED" 11:32:14 } 11:32:14at com.google.api.client.googleapis.json.GoogleJsonResponseException.from(GoogleJsonResponseException.java:146) 11:32:14at com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:113) 11:32:14at com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:40) 11:32:14at com.google.api.client.googleapis.services.AbstractGoogleClientRequest$1.interceptResponse(AbstractGoogleClientRequest.java:321) 11:32:14at com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1067) 11:32:14at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:419) 11:32:14at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:352) 11:32:14at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:469) 11:32:14at org.apache.beam.runners.dataflow.DataflowClient.createJob(DataflowClient.java:61) 11:32:14at org.apache.beam.runners.dataflow.DataflowRunner.run(DataflowRunner.java:855) 11:32:14... 41 more' ``` -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266106&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266106 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:58 Start Date: 24/Jun/19 20:58 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505178797 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266106) Time Spent: 3h (was: 2h 50m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 3h > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7437) Integration Test for BQ streaming inserts for streaming pipelines
[ https://issues.apache.org/jira/browse/BEAM-7437?focusedWorklogId=266105&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266105 ] ASF GitHub Bot logged work on BEAM-7437: Author: ASF GitHub Bot Created on: 24/Jun/19 20:52 Start Date: 24/Jun/19 20:52 Worklog Time Spent: 10m Work Description: ttanay commented on issue #8934: [BEAM-7437] Add streaming flag to BQ streaming inserts IT test URL: https://github.com/apache/beam/pull/8934#issuecomment-505176503 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266105) Time Spent: 2h 10m (was: 2h) > Integration Test for BQ streaming inserts for streaming pipelines > - > > Key: BEAM-7437 > URL: https://issues.apache.org/jira/browse/BEAM-7437 > Project: Beam > Issue Type: Test > Components: io-python-gcp >Affects Versions: 2.12.0 >Reporter: Tanay Tummalapalli >Assignee: Tanay Tummalapalli >Priority: Minor > Labels: test > Fix For: 2.14.0 > > Time Spent: 2h 10m > Remaining Estimate: 0h > > Integration Test for BigQuery Sink using Streaming Inserts for streaming > pipelines. > Integration tests currently exist for batch pipelines, it can also be added > for streaming pipelines using TestStream. This will be a precursor to the > failing integration test to be added for [BEAM-6611| > https://issues.apache.org/jira/browse/BEAM-6611]. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7455) Improve Avro IO integration test coverage on Python 3.
[ https://issues.apache.org/jira/browse/BEAM-7455?focusedWorklogId=266102&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266102 ] ASF GitHub Bot logged work on BEAM-7455: Author: ASF GitHub Bot Created on: 24/Jun/19 20:44 Start Date: 24/Jun/19 20:44 Worklog Time Spent: 10m Work Description: chamikaramj commented on issue #8818: [BEAM-7455] Improve Avro IO integration test coverage on Python 3. URL: https://github.com/apache/beam/pull/8818#issuecomment-505173482 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266102) Time Spent: 3h (was: 2h 50m) > Improve Avro IO integration test coverage on Python 3. > -- > > Key: BEAM-7455 > URL: https://issues.apache.org/jira/browse/BEAM-7455 > Project: Beam > Issue Type: Sub-task > Components: io-python-avro >Reporter: Valentyn Tymofieiev >Assignee: Frederik Bode >Priority: Major > Time Spent: 3h > Remaining Estimate: 0h > > It seems that we don't have an integration test for Avro IO on Python 3: > fastavro_it_test [1] depends on both avro and fastavro, however avro package > currently does not work with Beam on Python 3, so we don't have an > integration test that exercises Avro IO on Python 3. > We should add an integration test for Avro IO that does not need both > libraries at the same time, and instead can run using either library. > [~frederik] is this something you could help with? > cc: [~chamikara] [~Juta] > [1] > https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/fastavro_it_test.py -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7455) Improve Avro IO integration test coverage on Python 3.
[ https://issues.apache.org/jira/browse/BEAM-7455?focusedWorklogId=266100&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266100 ] ASF GitHub Bot logged work on BEAM-7455: Author: ASF GitHub Bot Created on: 24/Jun/19 20:43 Start Date: 24/Jun/19 20:43 Worklog Time Spent: 10m Work Description: chamikaramj commented on issue #8818: [BEAM-7455] Improve Avro IO integration test coverage on Python 3. URL: https://github.com/apache/beam/pull/8818#issuecomment-505173276 Thanks LGTM. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266100) Time Spent: 2h 40m (was: 2.5h) > Improve Avro IO integration test coverage on Python 3. > -- > > Key: BEAM-7455 > URL: https://issues.apache.org/jira/browse/BEAM-7455 > Project: Beam > Issue Type: Sub-task > Components: io-python-avro >Reporter: Valentyn Tymofieiev >Assignee: Frederik Bode >Priority: Major > Time Spent: 2h 40m > Remaining Estimate: 0h > > It seems that we don't have an integration test for Avro IO on Python 3: > fastavro_it_test [1] depends on both avro and fastavro, however avro package > currently does not work with Beam on Python 3, so we don't have an > integration test that exercises Avro IO on Python 3. > We should add an integration test for Avro IO that does not need both > libraries at the same time, and instead can run using either library. > [~frederik] is this something you could help with? > cc: [~chamikara] [~Juta] > [1] > https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/fastavro_it_test.py -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7455) Improve Avro IO integration test coverage on Python 3.
[ https://issues.apache.org/jira/browse/BEAM-7455?focusedWorklogId=266101&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266101 ] ASF GitHub Bot logged work on BEAM-7455: Author: ASF GitHub Bot Created on: 24/Jun/19 20:43 Start Date: 24/Jun/19 20:43 Worklog Time Spent: 10m Work Description: chamikaramj commented on issue #8818: [BEAM-7455] Improve Avro IO integration test coverage on Python 3. URL: https://github.com/apache/beam/pull/8818#issuecomment-505173391 Run Python PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266101) Time Spent: 2h 50m (was: 2h 40m) > Improve Avro IO integration test coverage on Python 3. > -- > > Key: BEAM-7455 > URL: https://issues.apache.org/jira/browse/BEAM-7455 > Project: Beam > Issue Type: Sub-task > Components: io-python-avro >Reporter: Valentyn Tymofieiev >Assignee: Frederik Bode >Priority: Major > Time Spent: 2h 50m > Remaining Estimate: 0h > > It seems that we don't have an integration test for Avro IO on Python 3: > fastavro_it_test [1] depends on both avro and fastavro, however avro package > currently does not work with Beam on Python 3, so we don't have an > integration test that exercises Avro IO on Python 3. > We should add an integration test for Avro IO that does not need both > libraries at the same time, and instead can run using either library. > [~frederik] is this something you could help with? > cc: [~chamikara] [~Juta] > [1] > https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/fastavro_it_test.py -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266099&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266099 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:42 Start Date: 24/Jun/19 20:42 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505172958 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266099) Time Spent: 2h 50m (was: 2h 40m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 2h 50m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266097&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266097 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:35 Start Date: 24/Jun/19 20:35 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505170423 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266097) Time Spent: 2h 40m (was: 2.5h) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 2h 40m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7424) Retry HTTP 429 errors from GCS w/ exponential backoff when reading data
[ https://issues.apache.org/jira/browse/BEAM-7424?focusedWorklogId=266095&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266095 ] ASF GitHub Bot logged work on BEAM-7424: Author: ASF GitHub Bot Created on: 24/Jun/19 20:32 Start Date: 24/Jun/19 20:32 Worklog Time Spent: 10m Work Description: ihji commented on issue #8933: [BEAM-7424] Retry HTTP 429 errors from GCS URL: https://github.com/apache/beam/pull/8933#issuecomment-505169180 run python postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266095) Time Spent: 1h 10m (was: 1h) > Retry HTTP 429 errors from GCS w/ exponential backoff when reading data > --- > > Key: BEAM-7424 > URL: https://issues.apache.org/jira/browse/BEAM-7424 > Project: Beam > Issue Type: Bug > Components: io-java-gcp, io-python-gcp, sdk-py-core >Reporter: Chamikara Jayalath >Assignee: Heejong Lee >Priority: Blocker > Fix For: 2.14.0 > > Time Spent: 1h 10m > Remaining Estimate: 0h > > This has to be done for both Java and Python SDKs. > Seems like Java SDK already retries 429 errors w/o backoff (please verify):Β > [https://github.com/apache/beam/blob/master/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializer.java#L185] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7424) Retry HTTP 429 errors from GCS w/ exponential backoff when reading data
[ https://issues.apache.org/jira/browse/BEAM-7424?focusedWorklogId=266092&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266092 ] ASF GitHub Bot logged work on BEAM-7424: Author: ASF GitHub Bot Created on: 24/Jun/19 20:32 Start Date: 24/Jun/19 20:32 Worklog Time Spent: 10m Work Description: ihji commented on issue #8933: [BEAM-7424] Retry HTTP 429 errors from GCS URL: https://github.com/apache/beam/pull/8933#issuecomment-505169180 run python postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266092) Time Spent: 40m (was: 0.5h) > Retry HTTP 429 errors from GCS w/ exponential backoff when reading data > --- > > Key: BEAM-7424 > URL: https://issues.apache.org/jira/browse/BEAM-7424 > Project: Beam > Issue Type: Bug > Components: io-java-gcp, io-python-gcp, sdk-py-core >Reporter: Chamikara Jayalath >Assignee: Heejong Lee >Priority: Blocker > Fix For: 2.14.0 > > Time Spent: 40m > Remaining Estimate: 0h > > This has to be done for both Java and Python SDKs. > Seems like Java SDK already retries 429 errors w/o backoff (please verify):Β > [https://github.com/apache/beam/blob/master/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializer.java#L185] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7424) Retry HTTP 429 errors from GCS w/ exponential backoff when reading data
[ https://issues.apache.org/jira/browse/BEAM-7424?focusedWorklogId=266091&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266091 ] ASF GitHub Bot logged work on BEAM-7424: Author: ASF GitHub Bot Created on: 24/Jun/19 20:32 Start Date: 24/Jun/19 20:32 Worklog Time Spent: 10m Work Description: ihji commented on issue #8933: [BEAM-7424] Retry HTTP 429 errors from GCS URL: https://github.com/apache/beam/pull/8933#issuecomment-505136412 run python postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266091) Time Spent: 0.5h (was: 20m) > Retry HTTP 429 errors from GCS w/ exponential backoff when reading data > --- > > Key: BEAM-7424 > URL: https://issues.apache.org/jira/browse/BEAM-7424 > Project: Beam > Issue Type: Bug > Components: io-java-gcp, io-python-gcp, sdk-py-core >Reporter: Chamikara Jayalath >Assignee: Heejong Lee >Priority: Blocker > Fix For: 2.14.0 > > Time Spent: 0.5h > Remaining Estimate: 0h > > This has to be done for both Java and Python SDKs. > Seems like Java SDK already retries 429 errors w/o backoff (please verify):Β > [https://github.com/apache/beam/blob/master/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializer.java#L185] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7424) Retry HTTP 429 errors from GCS w/ exponential backoff when reading data
[ https://issues.apache.org/jira/browse/BEAM-7424?focusedWorklogId=266094&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266094 ] ASF GitHub Bot logged work on BEAM-7424: Author: ASF GitHub Bot Created on: 24/Jun/19 20:32 Start Date: 24/Jun/19 20:32 Worklog Time Spent: 10m Work Description: ihji commented on issue #8933: [BEAM-7424] Retry HTTP 429 errors from GCS URL: https://github.com/apache/beam/pull/8933#issuecomment-505169238 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266094) Time Spent: 1h (was: 50m) > Retry HTTP 429 errors from GCS w/ exponential backoff when reading data > --- > > Key: BEAM-7424 > URL: https://issues.apache.org/jira/browse/BEAM-7424 > Project: Beam > Issue Type: Bug > Components: io-java-gcp, io-python-gcp, sdk-py-core >Reporter: Chamikara Jayalath >Assignee: Heejong Lee >Priority: Blocker > Fix For: 2.14.0 > > Time Spent: 1h > Remaining Estimate: 0h > > This has to be done for both Java and Python SDKs. > Seems like Java SDK already retries 429 errors w/o backoff (please verify):Β > [https://github.com/apache/beam/blob/master/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializer.java#L185] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7424) Retry HTTP 429 errors from GCS w/ exponential backoff when reading data
[ https://issues.apache.org/jira/browse/BEAM-7424?focusedWorklogId=266093&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266093 ] ASF GitHub Bot logged work on BEAM-7424: Author: ASF GitHub Bot Created on: 24/Jun/19 20:32 Start Date: 24/Jun/19 20:32 Worklog Time Spent: 10m Work Description: ihji commented on issue #8933: [BEAM-7424] Retry HTTP 429 errors from GCS URL: https://github.com/apache/beam/pull/8933#issuecomment-505169238 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266093) Time Spent: 50m (was: 40m) > Retry HTTP 429 errors from GCS w/ exponential backoff when reading data > --- > > Key: BEAM-7424 > URL: https://issues.apache.org/jira/browse/BEAM-7424 > Project: Beam > Issue Type: Bug > Components: io-java-gcp, io-python-gcp, sdk-py-core >Reporter: Chamikara Jayalath >Assignee: Heejong Lee >Priority: Blocker > Fix For: 2.14.0 > > Time Spent: 50m > Remaining Estimate: 0h > > This has to be done for both Java and Python SDKs. > Seems like Java SDK already retries 429 errors w/o backoff (please verify):Β > [https://github.com/apache/beam/blob/master/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializer.java#L185] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266089&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266089 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:30 Start Date: 24/Jun/19 20:30 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-504605024 run java postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266089) Time Spent: 2h 20m (was: 2h 10m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 2h 20m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266090&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266090 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:30 Start Date: 24/Jun/19 20:30 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505159598 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266090) Time Spent: 2.5h (was: 2h 20m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 2.5h > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266088&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266088 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:30 Start Date: 24/Jun/19 20:30 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505083016 run java postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266088) Time Spent: 2h 10m (was: 2h) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 2h 10m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266085&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266085 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:30 Start Date: 24/Jun/19 20:30 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505152050 run java postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266085) Time Spent: 1h 40m (was: 1.5h) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 1h 40m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266086&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266086 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:30 Start Date: 24/Jun/19 20:30 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505105463 Run Java PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266086) Time Spent: 1h 50m (was: 1h 40m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 1h 50m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266087&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266087 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:30 Start Date: 24/Jun/19 20:30 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505104605 run java precommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266087) Time Spent: 2h (was: 1h 50m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 2h > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266084&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266084 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:30 Start Date: 24/Jun/19 20:30 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505168287 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266084) Time Spent: 1.5h (was: 1h 20m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 1.5h > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266082&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266082 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:29 Start Date: 24/Jun/19 20:29 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505168114 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266082) Time Spent: 1h 20m (was: 1h 10m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 1h 20m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266080&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266080 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 20:23 Start Date: 24/Jun/19 20:23 Worklog Time Spent: 10m Work Description: y1chi commented on issue #8903: [BEAM-7389] Add Python snippet for ParDo transform URL: https://github.com/apache/beam/pull/8903#issuecomment-505165503 @aaltay @chamikaramj could you help merge the PR? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266080) Time Spent: 17.5h (was: 17h 20m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 17.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7598) failure in postCommitIT
[ https://issues.apache.org/jira/browse/BEAM-7598?focusedWorklogId=266079&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266079 ] ASF GitHub Bot logged work on BEAM-7598: Author: ASF GitHub Bot Created on: 24/Jun/19 20:23 Start Date: 24/Jun/19 20:23 Worklog Time Spent: 10m Work Description: markflyhigh commented on pull request #8916: [BEAM-7598] Do not build Python tar file in run_integration_test.sh URL: https://github.com/apache/beam/pull/8916 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266079) Time Spent: 3h (was: 2h 50m) > failure in postCommitIT > --- > > Key: BEAM-7598 > URL: https://issues.apache.org/jira/browse/BEAM-7598 > Project: Beam > Issue Type: Bug > Components: sdk-py-core, testing >Reporter: Udi Meiri >Assignee: Mark Liu >Priority: Major > Time Spent: 3h > Remaining Estimate: 0h > > 02:27:21 > Task :sdks:python:test-suites:direct:py35:postCommitIT FAILED > 02:27:21 setup.py:176: UserWarning: Python 3 support for the Apache Beam SDK > is not yet fully supported. You may encounter buggy behavior or missing > features. > 02:27:21 'Python 3 support for the Apache Beam SDK is not yet fully > supported. ' > 02:27:21 > /home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python3_Verify_PR/src/build/gradleenv/1398941889/lib/python3.5/site-packages/setuptools/dist.py:472: > UserWarning: Normalizing '2.14.0.dev' to '2.14.0.dev0' > 02:27:21 normalized_version, > 02:27:21 warning: no files found matching 'README.md' > 02:27:21 warning: no files found matching 'NOTICE' > 02:27:21 warning: no files found matching 'LICENSE' > 02:27:21 warning: cmd: standard file not found: should have one of README, > README.rst, README.txt, README.md > 02:27:21 > 02:27:21 error: [Errno 2] No such file or directory: > 'apache-beam-2.14.0.dev0/apache_beam/io/gcp/tests' > This is from: > https://builds.apache.org/job/beam_PostCommit_Python3_Verify_PR/409/consoleFull > Another example: > https://builds.apache.org/job/beam_PostCommit_Python3_Verify_PR/423/consoleFull > [~tvalentyn][~markflyhigh] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7598) failure in postCommitIT
[ https://issues.apache.org/jira/browse/BEAM-7598?focusedWorklogId=266078&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266078 ] ASF GitHub Bot logged work on BEAM-7598: Author: ASF GitHub Bot Created on: 24/Jun/19 20:22 Start Date: 24/Jun/19 20:22 Worklog Time Spent: 10m Work Description: markflyhigh commented on issue #8916: [BEAM-7598] Do not build Python tar file in run_integration_test.sh URL: https://github.com/apache/beam/pull/8916#issuecomment-505165180 The failure job `Portable_Python` is caused by infra and not related to this change. Python Precommit has good coverage on this script. I'll go ahead and merge it. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266078) Time Spent: 2h 50m (was: 2h 40m) > failure in postCommitIT > --- > > Key: BEAM-7598 > URL: https://issues.apache.org/jira/browse/BEAM-7598 > Project: Beam > Issue Type: Bug > Components: sdk-py-core, testing >Reporter: Udi Meiri >Assignee: Mark Liu >Priority: Major > Time Spent: 2h 50m > Remaining Estimate: 0h > > 02:27:21 > Task :sdks:python:test-suites:direct:py35:postCommitIT FAILED > 02:27:21 setup.py:176: UserWarning: Python 3 support for the Apache Beam SDK > is not yet fully supported. You may encounter buggy behavior or missing > features. > 02:27:21 'Python 3 support for the Apache Beam SDK is not yet fully > supported. ' > 02:27:21 > /home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python3_Verify_PR/src/build/gradleenv/1398941889/lib/python3.5/site-packages/setuptools/dist.py:472: > UserWarning: Normalizing '2.14.0.dev' to '2.14.0.dev0' > 02:27:21 normalized_version, > 02:27:21 warning: no files found matching 'README.md' > 02:27:21 warning: no files found matching 'NOTICE' > 02:27:21 warning: no files found matching 'LICENSE' > 02:27:21 warning: cmd: standard file not found: should have one of README, > README.rst, README.txt, README.md > 02:27:21 > 02:27:21 error: [Errno 2] No such file or directory: > 'apache-beam-2.14.0.dev0/apache_beam/io/gcp/tests' > This is from: > https://builds.apache.org/job/beam_PostCommit_Python3_Verify_PR/409/consoleFull > Another example: > https://builds.apache.org/job/beam_PostCommit_Python3_Verify_PR/423/consoleFull > [~tvalentyn][~markflyhigh] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Updated] (BEAM-7437) Integration Test for BQ streaming inserts for streaming pipelines
[ https://issues.apache.org/jira/browse/BEAM-7437?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Tanay Tummalapalli updated BEAM-7437: - Status: Open (was: Triage Needed) > Integration Test for BQ streaming inserts for streaming pipelines > - > > Key: BEAM-7437 > URL: https://issues.apache.org/jira/browse/BEAM-7437 > Project: Beam > Issue Type: Test > Components: io-python-gcp >Affects Versions: 2.12.0 >Reporter: Tanay Tummalapalli >Assignee: Tanay Tummalapalli >Priority: Minor > Labels: test > Fix For: 2.14.0 > > Time Spent: 2h > Remaining Estimate: 0h > > Integration Test for BigQuery Sink using Streaming Inserts for streaming > pipelines. > Integration tests currently exist for batch pipelines, it can also be added > for streaming pipelines using TestStream. This will be a precursor to the > failing integration test to be added for [BEAM-6611| > https://issues.apache.org/jira/browse/BEAM-6611]. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Reopened] (BEAM-7437) Integration Test for BQ streaming inserts for streaming pipelines
[ https://issues.apache.org/jira/browse/BEAM-7437?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Tanay Tummalapalli reopened BEAM-7437: -- Missed adding streaming=True to PipelineOptions > Integration Test for BQ streaming inserts for streaming pipelines > - > > Key: BEAM-7437 > URL: https://issues.apache.org/jira/browse/BEAM-7437 > Project: Beam > Issue Type: Test > Components: io-python-gcp >Affects Versions: 2.12.0 >Reporter: Tanay Tummalapalli >Assignee: Tanay Tummalapalli >Priority: Minor > Labels: test > Fix For: 2.14.0 > > Time Spent: 2h > Remaining Estimate: 0h > > Integration Test for BigQuery Sink using Streaming Inserts for streaming > pipelines. > Integration tests currently exist for batch pipelines, it can also be added > for streaming pipelines using TestStream. This will be a precursor to the > failing integration test to be added for [BEAM-6611| > https://issues.apache.org/jira/browse/BEAM-6611]. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Created] (BEAM-7627) Jenkins build failed on "No space left on device"
Mark Liu created BEAM-7627: -- Summary: Jenkins build failed on "No space left on device" Key: BEAM-7627 URL: https://issues.apache.org/jira/browse/BEAM-7627 Project: Beam Issue Type: Bug Components: build-system Reporter: Mark Liu https://builds.apache.org/job/beam_PreCommit_Portable_Python_Commit/4382/console ``` 00:02:19.095 java.lang.IllegalStateException: java.io.IOException: No space left on device 00:02:19.095at com.gradle.scan.a.e.b.flush(SourceFile:233) 00:02:19.095at com.gradle.scan.a.e.b.close(SourceFile:244) 00:02:19.095at com.gradle.scan.a.e.c.close(SourceFile:68) 00:02:19.095at com.gradle.scan.plugin.internal.k.a(SourceFile:88) 00:02:19.095at com.gradle.scan.plugin.internal.e.c.c.b(SourceFile:121) 00:02:19.095at com.gradle.scan.plugin.internal.e.c.c.a(SourceFile:113) 00:02:19.095at com.gradle.scan.plugin.internal.o.a$a.a(SourceFile:30) 00:02:19.095at com.gradle.scan.plugin.internal.o.a$a.a(SourceFile:19) 00:02:19.095at com.gradle.scan.plugin.internal.o.a.c(SourceFile:60) 00:02:19.095 Caused by: java.io.IOException: No space left on device 00:02:19.095at com.gradle.scan.a.e.b.flush(SourceFile:231) 00:02:19.095... 8 more ``` Looks like there is not enough space (disk?) on Jenkins worker for a build. If I remember correctly, a worker will run max of two build jobs concurrently. Not sure if it's caused by huge resource utilization in runtime, parallel execution or resource leak. For the third case, we should cleanup the worker and find out the leak place. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266059&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266059 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 20:12 Start Date: 24/Jun/19 20:12 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8905: [BEAM-7389] Add Python snippet for Regex transform URL: https://github.com/apache/beam/pull/8905#issuecomment-505161268 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266059) Time Spent: 17h 10m (was: 17h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 17h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266060&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266060 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 20:12 Start Date: 24/Jun/19 20:12 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8905: [BEAM-7389] Add Python snippet for Regex transform URL: https://github.com/apache/beam/pull/8905#issuecomment-505161268 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266060) Time Spent: 17h 20m (was: 17h 10m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 17h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266053&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266053 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 20:07 Start Date: 24/Jun/19 20:07 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505159598 Run Java_Examples_Dataflow PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266053) Time Spent: 1h 10m (was: 1h) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 1h 10m > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-6611) A Python Sink for BigQuery with File Loads in Streaming
[ https://issues.apache.org/jira/browse/BEAM-6611?focusedWorklogId=266048&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266048 ] ASF GitHub Bot logged work on BEAM-6611: Author: ASF GitHub Bot Created on: 24/Jun/19 20:00 Start Date: 24/Jun/19 20:00 Worklog Time Spent: 10m Work Description: ttanay commented on issue #8871: [BEAM-6611] BigQuery file loads in Streaming for Python SDK URL: https://github.com/apache/beam/pull/8871#issuecomment-505157403 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266048) Time Spent: 40m (was: 0.5h) > A Python Sink for BigQuery with File Loads in Streaming > --- > > Key: BEAM-6611 > URL: https://issues.apache.org/jira/browse/BEAM-6611 > Project: Beam > Issue Type: Improvement > Components: sdk-py-core >Reporter: Pablo Estrada >Assignee: Tanay Tummalapalli >Priority: Major > Labels: gsoc, gsoc2019, mentor > Time Spent: 40m > Remaining Estimate: 0h > > The Java SDK supports a bunch of methods for writing data into BigQuery, > while the Python SDK supports the following: > - Streaming inserts for streaming pipelines [As seen in [bigquery.py and > BigQueryWriteFn|https://github.com/apache/beam/blob/master/sdks/python/apache_beam/io/gcp/bigquery.py#L649-L813]] > - File loads for batch pipelines [As implemented in [PR > 7655|https://github.com/apache/beam/pull/7655]] > Qucik and dirty early design doc: https://s.apache.org/beam-bqfl-py-streaming > The Java SDK also supports File Loads for Streaming pipelines [see BatchLoads > application|https://github.com/apache/beam/blob/master/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java#L1709-L1776]. > File loads have the advantage of being much cheaper than streaming inserts > (although they also are slower for the records to show up in the table). -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7437) Integration Test for BQ streaming inserts for streaming pipelines
[ https://issues.apache.org/jira/browse/BEAM-7437?focusedWorklogId=266041&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266041 ] ASF GitHub Bot logged work on BEAM-7437: Author: ASF GitHub Bot Created on: 24/Jun/19 19:52 Start Date: 24/Jun/19 19:52 Worklog Time Spent: 10m Work Description: ttanay commented on issue #8934: [BEAM-7437] Add streaming flag to BQ streaming inserts IT test URL: https://github.com/apache/beam/pull/8934#issuecomment-505154590 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266041) Time Spent: 2h (was: 1h 50m) > Integration Test for BQ streaming inserts for streaming pipelines > - > > Key: BEAM-7437 > URL: https://issues.apache.org/jira/browse/BEAM-7437 > Project: Beam > Issue Type: Test > Components: io-python-gcp >Affects Versions: 2.12.0 >Reporter: Tanay Tummalapalli >Assignee: Tanay Tummalapalli >Priority: Minor > Labels: test > Fix For: 2.14.0 > > Time Spent: 2h > Remaining Estimate: 0h > > Integration Test for BigQuery Sink using Streaming Inserts for streaming > pipelines. > Integration tests currently exist for batch pipelines, it can also be added > for streaming pipelines using TestStream. This will be a precursor to the > failing integration test to be added for [BEAM-6611| > https://issues.apache.org/jira/browse/BEAM-6611]. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Created] (BEAM-7626) ExecutableStage should be able to accept multiple input PCollection
Boyuan Zhang created BEAM-7626: -- Summary: ExecutableStage should be able to accept multiple input PCollection Key: BEAM-7626 URL: https://issues.apache.org/jira/browse/BEAM-7626 Project: Beam Issue Type: New Feature Components: java-fn-execution Reporter: Boyuan Zhang Current implementation of ExecutableStage only accepts one input PColletion: https://github.com/apache/beam/blob/5ee4cf4e4880782492ec26f2b454a6df9b25f1e2/model/pipeline/src/main/proto/beam_runner_api.proto#L1247. But it's possible that a ExecutableStage has multiple inputs. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7590) Convert PipelineOptionsMap to PipelineOption
[ https://issues.apache.org/jira/browse/BEAM-7590?focusedWorklogId=266034&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266034 ] ASF GitHub Bot logged work on BEAM-7590: Author: ASF GitHub Bot Created on: 24/Jun/19 19:44 Start Date: 24/Jun/19 19:44 Worklog Time Spent: 10m Work Description: riazela commented on issue #8928: [DO NOT MERGE] [BEAM-7590] Converting JDBC Pipeline Options Map to PipelineOptions. URL: https://github.com/apache/beam/pull/8928#issuecomment-505152050 run java postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266034) Time Spent: 1h (was: 50m) > Convert PipelineOptionsMap to PipelineOption > > > Key: BEAM-7590 > URL: https://issues.apache.org/jira/browse/BEAM-7590 > Project: Beam > Issue Type: Improvement > Components: dsl-sql >Reporter: Alireza Samadianzakaria >Assignee: Alireza Samadianzakaria >Priority: Minor > Time Spent: 1h > Remaining Estimate: 0h > > Currently, BeamCalciteTable keeps a map version of PipelineOptions and that > map version is used in JDBCConnection and RelNodes as well.Β This map is empty > when the pipeline is constructed from SQLTransform and it will have the > parameters passed from JDBC Client when the pipeline is started by JDBC path.Β > Since for Row-Count estimation we need to use PipelineOptions (or its > sub-classes) and we cannot convert a map that is created from a > pipelineOptions SubclassesΒ back to PipelineOptions, it is better to keep > PipelineOptions object itself. > Another thing that will be changed as a result is set command. Currently, if > in JDBC we use Set Command for a pipeline option, it will only change that > option in the map. This means even if the option is incorrect, it does not > throw exception until it creates the actual Pipeline Options. However, if we > are keeping the PipelineOptions class itself, then wee need to actually set > the passed parameters (using reflection) which will throw exception at the > time of setting them.Β > Β -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7437) Integration Test for BQ streaming inserts for streaming pipelines
[ https://issues.apache.org/jira/browse/BEAM-7437?focusedWorklogId=266033&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266033 ] ASF GitHub Bot logged work on BEAM-7437: Author: ASF GitHub Bot Created on: 24/Jun/19 19:43 Start Date: 24/Jun/19 19:43 Worklog Time Spent: 10m Work Description: ttanay commented on pull request #8934: [BEAM-7437] Add streaming flag to BQ streaming inserts IT test URL: https://github.com/apache/beam/pull/8934 I missed adding the streaming=True option to the PipelineOptions earlier. Since TestStream ends once watermark advances to infinity, this test passed. But, after adding the streaming flag, it failed, because the BigqueryFullResultMatcher did not poll for changes in a streaming test pipeline. Added BigqueryFullResultStreamingMatcher to poll BigQuery before matching. **Please** add a meaningful description for your change here Thank you for your contribution! Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] [**Choose reviewer(s)**](https://beam.apache.org/contribute/#make-your-change) and mention them in a comment (`R: @username`). - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/) Python | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Python3_Verify/lastC
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266032&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266032 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:35 Start Date: 24/Jun/19 19:35 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8907: [BEAM-7389] Add Python snippet for WithTimestamps transform URL: https://github.com/apache/beam/pull/8907#issuecomment-504243460 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266032) Time Spent: 17h (was: 16h 50m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 17h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266031&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266031 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:35 Start Date: 24/Jun/19 19:35 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8907: [BEAM-7389] Add Python snippet for WithTimestamps transform URL: https://github.com/apache/beam/pull/8907#issuecomment-504220109 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266031) Time Spent: 16h 50m (was: 16h 40m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 16h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266029&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266029 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:35 Start Date: 24/Jun/19 19:35 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8907: [BEAM-7389] Add Python snippet for WithTimestamps transform URL: https://github.com/apache/beam/pull/8907#issuecomment-504124922 Run RAT PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266029) Time Spent: 16.5h (was: 16h 20m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 16.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266027&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266027 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:35 Start Date: 24/Jun/19 19:35 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8907: [BEAM-7389] Add Python snippet for WithTimestamps transform URL: https://github.com/apache/beam/pull/8907#issuecomment-504124090 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266027) Time Spent: 16h 10m (was: 16h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 16h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266028&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266028 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:35 Start Date: 24/Jun/19 19:35 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8907: [BEAM-7389] Add Python snippet for WithTimestamps transform URL: https://github.com/apache/beam/pull/8907#issuecomment-504124777 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266028) Time Spent: 16h 20m (was: 16h 10m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 16h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266030&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266030 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:35 Start Date: 24/Jun/19 19:35 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8907: [BEAM-7389] Add Python snippet for WithTimestamps transform URL: https://github.com/apache/beam/pull/8907#issuecomment-504182908 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266030) Time Spent: 16h 40m (was: 16.5h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 16h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266024&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266024 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:34 Start Date: 24/Jun/19 19:34 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8902: [BEAM-7389] Add Python snippet for Map transform URL: https://github.com/apache/beam/pull/8902#issuecomment-504124655 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266024) Time Spent: 15h 50m (was: 15h 40m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 15h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266019&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266019 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:34 Start Date: 24/Jun/19 19:34 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8902: [BEAM-7389] Add Python snippet for Map transform URL: https://github.com/apache/beam/pull/8902#issuecomment-504182082 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266019) Time Spent: 15h (was: 14h 50m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 15h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266023&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266023 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:34 Start Date: 24/Jun/19 19:34 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8902: [BEAM-7389] Add Python snippet for Map transform URL: https://github.com/apache/beam/pull/8902#issuecomment-504124682 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266023) Time Spent: 15h 40m (was: 15.5h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 15h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266022&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266022 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:34 Start Date: 24/Jun/19 19:34 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8902: [BEAM-7389] Add Python snippet for Map transform URL: https://github.com/apache/beam/pull/8902#issuecomment-504182108 Run Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266022) Time Spent: 15.5h (was: 15h 20m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 15.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266020&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266020 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:34 Start Date: 24/Jun/19 19:34 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8902: [BEAM-7389] Add Python snippet for Map transform URL: https://github.com/apache/beam/pull/8902#issuecomment-504197179 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266020) Time Spent: 15h 10m (was: 15h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 15h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266025&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266025 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:34 Start Date: 24/Jun/19 19:34 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8902: [BEAM-7389] Add Python snippet for Map transform URL: https://github.com/apache/beam/pull/8902#issuecomment-504124128 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266025) Time Spent: 16h (was: 15h 50m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 16h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266021&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266021 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:34 Start Date: 24/Jun/19 19:34 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8902: [BEAM-7389] Add Python snippet for Map transform URL: https://github.com/apache/beam/pull/8902#issuecomment-504220124 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266021) Time Spent: 15h 20m (was: 15h 10m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 15h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266017&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266017 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:33 Start Date: 24/Jun/19 19:33 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8905: [BEAM-7389] Add Python snippet for Regex transform URL: https://github.com/apache/beam/pull/8905#issuecomment-505145347 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266017) Time Spent: 14h 40m (was: 14.5h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 14h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266018&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266018 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:33 Start Date: 24/Jun/19 19:33 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8903: [BEAM-7389] Add Python snippet for ParDo transform URL: https://github.com/apache/beam/pull/8903#issuecomment-505148633 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266018) Time Spent: 14h 50m (was: 14h 40m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 14h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266015&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266015 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:33 Start Date: 24/Jun/19 19:33 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8903: [BEAM-7389] Add Python snippet for ParDo transform URL: https://github.com/apache/beam/pull/8903#issuecomment-505148633 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266015) Time Spent: 14.5h (was: 14h 20m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 14.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266014&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266014 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:32 Start Date: 24/Jun/19 19:32 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8903: [BEAM-7389] Add Python snippet for ParDo transform URL: https://github.com/apache/beam/pull/8903#issuecomment-505148290 Pushed a new version where I removed most examples (which are already in FlatMap). I only left the one for a custom `DoFn`, and the one for the `DoFn` parameters (timestamp and window). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266014) Time Spent: 14h 20m (was: 14h 10m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 14h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=266001&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-266001 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:23 Start Date: 24/Jun/19 19:23 Worklog Time Spent: 10m Work Description: davidcavazos commented on issue #8905: [BEAM-7389] Add Python snippet for Regex transform URL: https://github.com/apache/beam/pull/8905#issuecomment-505145347 Run Portable_Python PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 266001) Time Spent: 14h 10m (was: 14h) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 14h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=265994&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-265994 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:20 Start Date: 24/Jun/19 19:20 Worklog Time Spent: 10m Work Description: davidcavazos commented on pull request #8905: [BEAM-7389] Add Python snippet for Regex transform URL: https://github.com/apache/beam/pull/8905#discussion_r296874128 ## File path: sdks/python/apache_beam/examples/snippets/transforms/element_wise/regex.py ## @@ -0,0 +1,161 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import absolute_import +from __future__ import print_function + + +def regex_matches(test=None): + # [START regex_matches] + import apache_beam as beam + import re + + def parse_plant(text): +m = re.match(r'^([^\s-]+)\s*-\s*(\w+)\s*-\s*(?P\w+)$', text) +if m: + yield { + 'match': m.group(0), # contains the entire matched text + 'icon': m.group(1), # ([^\s-]+) - group + 'name': m.group(2), # (\w+) - group + 'duration': m.group('duration'), # (?P\w+) - named group + } + + with beam.Pipeline() as pipeline: +plant_matches = ( +pipeline +| 'Garden plants' >> beam.Create([ +'π - Strawberry - perennial', +'π₯ - Carrot - biennial', +'# π - invalid - format', +'π\t-\tEggplant\t-\tperennial', +'π - Tomato - annual', +'π - invalid - format with trailing words', +'π₯-Potato-perennial', +]) +| 'Parse plants' >> beam.FlatMap(parse_plant) +| beam.Map(print) +) +# [END regex_all_matches] Review comment: Good catch, fixed This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 265994) Time Spent: 14h (was: 13h 50m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 14h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=265995&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-265995 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:20 Start Date: 24/Jun/19 19:20 Worklog Time Spent: 10m Work Description: davidcavazos commented on pull request #8905: [BEAM-7389] Add Python snippet for Regex transform URL: https://github.com/apache/beam/pull/8905#discussion_r296874037 ## File path: sdks/python/apache_beam/examples/snippets/transforms/element_wise/regex.py ## @@ -0,0 +1,161 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import absolute_import +from __future__ import print_function + + +def regex_matches(test=None): + # [START regex_matches] + import apache_beam as beam + import re + + def parse_plant(text): +m = re.match(r'^([^\s-]+)\s*-\s*(\w+)\s*-\s*(?P\w+)$', text) +if m: + yield { + 'match': m.group(0), # contains the entire matched text + 'icon': m.group(1), # ([^\s-]+) - group + 'name': m.group(2), # (\w+) - group + 'duration': m.group('duration'), # (?P\w+) - named group + } + + with beam.Pipeline() as pipeline: +plant_matches = ( +pipeline +| 'Garden plants' >> beam.Create([ +'π - Strawberry - perennial', +'π₯ - Carrot - biennial', +'# π - invalid - format', +'π\t-\tEggplant\t-\tperennial', +'π - Tomato - annual', +'π - invalid - format with trailing words', +'π₯-Potato-perennial', +]) +| 'Parse plants' >> beam.FlatMap(parse_plant) +| beam.Map(print) Review comment: This is a sample, when people copy-paste this and run it they should see the results in their machines. It's using the `print` function which prints to `stdout` and returns `None`. This is just an easier way to demonstrate the transforms without actually going to the filesystem and explaining entire (potentially new) concepts. It only works on the DirectRunner, but we will eventually generate Jupyter Notebooks from these files too. Since the notebooks use the DirectRunner, it's actually a great experience to interactively see the results with a simple print function. For testing, the `print` function is replaced by `lambda elem: elem` which makes it a NoOp and just returns the same element so we can then test the resulting PCollection This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 265995) Time Spent: 14h (was: 13h 50m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 14h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7389) Colab examples for element-wise transforms (Python)
[ https://issues.apache.org/jira/browse/BEAM-7389?focusedWorklogId=265993&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-265993 ] ASF GitHub Bot logged work on BEAM-7389: Author: ASF GitHub Bot Created on: 24/Jun/19 19:20 Start Date: 24/Jun/19 19:20 Worklog Time Spent: 10m Work Description: davidcavazos commented on pull request #8905: [BEAM-7389] Add Python snippet for Regex transform URL: https://github.com/apache/beam/pull/8905#discussion_r296874453 ## File path: sdks/python/apache_beam/examples/snippets/transforms/element_wise/regex.py ## @@ -0,0 +1,161 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import absolute_import +from __future__ import print_function + + +def regex_matches(test=None): + # [START regex_matches] + import apache_beam as beam + import re + + def parse_plant(text): +m = re.match(r'^([^\s-]+)\s*-\s*(\w+)\s*-\s*(?P\w+)$', text) +if m: + yield { + 'match': m.group(0), # contains the entire matched text + 'icon': m.group(1), # ([^\s-]+) - group + 'name': m.group(2), # (\w+) - group + 'duration': m.group('duration'), # (?P\w+) - named group + } + + with beam.Pipeline() as pipeline: +plant_matches = ( +pipeline +| 'Garden plants' >> beam.Create([ +'π - Strawberry - perennial', +'π₯ - Carrot - biennial', +'# π - invalid - format', +'π\t-\tEggplant\t-\tperennial', +'π - Tomato - annual', +'π - invalid - format with trailing words', +'π₯-Potato-perennial', +]) +| 'Parse plants' >> beam.FlatMap(parse_plant) +| beam.Map(print) +) +# [END regex_all_matches] +if test: + test(plant_matches) + + +def regex_find(test=None): Review comment: Sure, I was following the Java version conventions, but it makes more sense to make this more Python-specific. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 265993) Time Spent: 13h 50m (was: 13h 40m) > Colab examples for element-wise transforms (Python) > --- > > Key: BEAM-7389 > URL: https://issues.apache.org/jira/browse/BEAM-7389 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Rose Nguyen >Assignee: David Cavazos >Priority: Minor > Time Spent: 13h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7424) Retry HTTP 429 errors from GCS w/ exponential backoff when reading data
[ https://issues.apache.org/jira/browse/BEAM-7424?focusedWorklogId=265967&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-265967 ] ASF GitHub Bot logged work on BEAM-7424: Author: ASF GitHub Bot Created on: 24/Jun/19 18:57 Start Date: 24/Jun/19 18:57 Worklog Time Spent: 10m Work Description: ihji commented on issue #8933: [BEAM-7424] Retry HTTP 429 errors from GCS URL: https://github.com/apache/beam/pull/8933#issuecomment-505136412 run python postcommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 265967) Time Spent: 20m (was: 10m) > Retry HTTP 429 errors from GCS w/ exponential backoff when reading data > --- > > Key: BEAM-7424 > URL: https://issues.apache.org/jira/browse/BEAM-7424 > Project: Beam > Issue Type: Bug > Components: io-java-gcp, io-python-gcp, sdk-py-core >Reporter: Chamikara Jayalath >Assignee: Heejong Lee >Priority: Blocker > Fix For: 2.14.0 > > Time Spent: 20m > Remaining Estimate: 0h > > This has to be done for both Java and Python SDKs. > Seems like Java SDK already retries 429 errors w/o backoff (please verify):Β > [https://github.com/apache/beam/blob/master/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/util/RetryHttpRequestInitializer.java#L185] -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Work logged] (BEAM-7424) Retry HTTP 429 errors from GCS w/ exponential backoff when reading data
[ https://issues.apache.org/jira/browse/BEAM-7424?focusedWorklogId=265966&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-265966 ] ASF GitHub Bot logged work on BEAM-7424: Author: ASF GitHub Bot Created on: 24/Jun/19 18:57 Start Date: 24/Jun/19 18:57 Worklog Time Spent: 10m Work Description: ihji commented on pull request #8933: [BEAM-7424] Retry HTTP 429 errors from GCS URL: https://github.com/apache/beam/pull/8933 Thank you for your contribution! Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] [**Choose reviewer(s)**](https://beam.apache.org/contribute/#make-your-change) and mention them in a comment (`R: @username`). - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/) Python | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Python3_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python3_Verify/lastCompletedBuild/) | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/) [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/la
[jira] [Work logged] (BEAM-7598) failure in postCommitIT
[ https://issues.apache.org/jira/browse/BEAM-7598?focusedWorklogId=265956&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-265956 ] ASF GitHub Bot logged work on BEAM-7598: Author: ASF GitHub Bot Created on: 24/Jun/19 18:43 Start Date: 24/Jun/19 18:43 Worklog Time Spent: 10m Work Description: markflyhigh commented on issue #8916: [BEAM-7598] Do not build Python tar file in run_integration_test.sh URL: https://github.com/apache/beam/pull/8916#issuecomment-505131361 PTAL @tvalentyn This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 265956) Time Spent: 2h 40m (was: 2.5h) > failure in postCommitIT > --- > > Key: BEAM-7598 > URL: https://issues.apache.org/jira/browse/BEAM-7598 > Project: Beam > Issue Type: Bug > Components: sdk-py-core, testing >Reporter: Udi Meiri >Assignee: Mark Liu >Priority: Major > Time Spent: 2h 40m > Remaining Estimate: 0h > > 02:27:21 > Task :sdks:python:test-suites:direct:py35:postCommitIT FAILED > 02:27:21 setup.py:176: UserWarning: Python 3 support for the Apache Beam SDK > is not yet fully supported. You may encounter buggy behavior or missing > features. > 02:27:21 'Python 3 support for the Apache Beam SDK is not yet fully > supported. ' > 02:27:21 > /home/jenkins/jenkins-slave/workspace/beam_PostCommit_Python3_Verify_PR/src/build/gradleenv/1398941889/lib/python3.5/site-packages/setuptools/dist.py:472: > UserWarning: Normalizing '2.14.0.dev' to '2.14.0.dev0' > 02:27:21 normalized_version, > 02:27:21 warning: no files found matching 'README.md' > 02:27:21 warning: no files found matching 'NOTICE' > 02:27:21 warning: no files found matching 'LICENSE' > 02:27:21 warning: cmd: standard file not found: should have one of README, > README.rst, README.txt, README.md > 02:27:21 > 02:27:21 error: [Errno 2] No such file or directory: > 'apache-beam-2.14.0.dev0/apache_beam/io/gcp/tests' > This is from: > https://builds.apache.org/job/beam_PostCommit_Python3_Verify_PR/409/consoleFull > Another example: > https://builds.apache.org/job/beam_PostCommit_Python3_Verify_PR/423/consoleFull > [~tvalentyn][~markflyhigh] -- This message was sent by Atlassian JIRA (v7.6.3#76005)