Use beam.Map rather than beam.ParDo for PubSub encoding.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fb61c540 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fb61c540 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fb61c540 Branch: refs/heads/master Commit: fb61c540bc15bafb959d7accb7c08f6a681f62ef Parents: 97957ea Author: Robert Bradshaw <rober...@google.com> Authored: Thu Jun 8 15:01:53 2017 -0700 Committer: Robert Bradshaw <rober...@google.com> Committed: Fri Jun 9 13:00:55 2017 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/io/gcp/pubsub.py | 16 +++------------- sdks/python/apache_beam/io/gcp/pubsub_test.py | 10 ---------- 2 files changed, 3 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/fb61c540/sdks/python/apache_beam/io/gcp/pubsub.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/pubsub.py b/sdks/python/apache_beam/io/gcp/pubsub.py index 40326e1..6dc1528 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub.py +++ b/sdks/python/apache_beam/io/gcp/pubsub.py @@ -29,7 +29,7 @@ from apache_beam.io.iobase import Read from apache_beam.io.iobase import Write from apache_beam.runners.dataflow.native_io import iobase as dataflow_io from apache_beam.transforms import PTransform -from apache_beam.transforms import ParDo +from apache_beam.transforms import Map from apache_beam.transforms.display import DisplayDataItem @@ -71,7 +71,7 @@ class ReadStringsFromPubSub(PTransform): def expand(self, pvalue): pcoll = pvalue.pipeline | Read(self._source) pcoll.element_type = bytes - pcoll = pcoll | 'decode string' >> ParDo(_decodeUtf8String) + pcoll = pcoll | 'DecodeString' >> Map(lambda b: b.decode('utf-8')) pcoll.element_type = unicode return pcoll @@ -89,7 +89,7 @@ class WriteStringsToPubSub(PTransform): self._sink = _PubSubPayloadSink(topic) def expand(self, pcoll): - pcoll = pcoll | 'encode string' >> ParDo(_encodeUtf8String) + pcoll = pcoll | 'EncodeString' >> Map(lambda s: s.encode('utf-8')) pcoll.element_type = bytes return pcoll | Write(self._sink) @@ -162,16 +162,6 @@ class _PubSubPayloadSink(dataflow_io.NativeSink): 'PubSubPayloadSink is not supported in local execution.') -def _decodeUtf8String(encoded_value): - """Decodes a string in utf-8 format from bytes""" - return encoded_value.decode('utf-8') - - -def _encodeUtf8String(value): - """Encodes a string in utf-8 format to bytes""" - return value.encode('utf-8') - - class PubSubSource(dataflow_io.NativeSource): """Deprecated: do not use. http://git-wip-us.apache.org/repos/asf/beam/blob/fb61c540/sdks/python/apache_beam/io/gcp/pubsub_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/pubsub_test.py b/sdks/python/apache_beam/io/gcp/pubsub_test.py index cf14e8c..5d3e985 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub_test.py +++ b/sdks/python/apache_beam/io/gcp/pubsub_test.py @@ -22,8 +22,6 @@ import unittest import hamcrest as hc -from apache_beam.io.gcp.pubsub import _decodeUtf8String -from apache_beam.io.gcp.pubsub import _encodeUtf8String from apache_beam.io.gcp.pubsub import _PubSubPayloadSink from apache_beam.io.gcp.pubsub import _PubSubPayloadSource from apache_beam.io.gcp.pubsub import ReadStringsFromPubSub @@ -120,14 +118,6 @@ class TestPubSubSink(unittest.TestCase): hc.assert_that(dd.items, hc.contains_inanyorder(*expected_items)) -class TestEncodeDecodeUtf8String(unittest.TestCase): - def test_encode(self): - self.assertEqual(b'test_data', _encodeUtf8String('test_data')) - - def test_decode(self): - self.assertEqual('test_data', _decodeUtf8String(b'test_data')) - - if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) unittest.main()