lukecwik commented on a change in pull request #12637:
URL: https://github.com/apache/beam/pull/12637#discussion_r492359619
##########
File path: sdks/python/apache_beam/runners/worker/data_plane_test.py
##########
@@ -108,16 +106,28 @@ def send(instruction_id, transform_id, data):
])
# Multiple interleaved writes to multiple instructions.
- send('1', transform_1, b'abc')
- send('2', transform_1, b'def')
+ stream11 = from_channel.output_stream('1', transform_1)
+ stream11.write(b'abc')
+ stream21 = from_channel.output_stream('2', transform_1)
+ stream21.write(b'def')
+ if not time_based_flush:
+ stream11.close()
self.assertEqual(
list(
itertools.islice(to_channel.input_elements('1', [transform_1]),
1)),
[
beam_fn_api_pb2.Elements.Data(
instruction_id='1', transform_id=transform_1, data=b'abc')
])
- send('2', transform_2, b'ghi')
+ if time_based_flush:
Review comment:
Why do we need to wait for the flush, shouldn't the earlier
`stream21.write(b'def')` provide the correct ordering?
##########
File path: sdks/python/apache_beam/runners/worker/data_plane_test.py
##########
@@ -108,16 +106,28 @@ def send(instruction_id, transform_id, data):
])
# Multiple interleaved writes to multiple instructions.
- send('1', transform_1, b'abc')
- send('2', transform_1, b'def')
+ stream11 = from_channel.output_stream('1', transform_1)
+ stream11.write(b'abc')
+ stream21 = from_channel.output_stream('2', transform_1)
+ stream21.write(b'def')
+ if not time_based_flush:
+ stream11.close()
self.assertEqual(
list(
itertools.islice(to_channel.input_elements('1', [transform_1]),
1)),
[
beam_fn_api_pb2.Elements.Data(
instruction_id='1', transform_id=transform_1, data=b'abc')
])
- send('2', transform_2, b'ghi')
+ if time_based_flush:
Review comment:
Please add details as comment.
##########
File path: sdks/python/apache_beam/runners/worker/data_plane_test.py
##########
@@ -108,16 +106,28 @@ def send(instruction_id, transform_id, data):
])
# Multiple interleaved writes to multiple instructions.
- send('1', transform_1, b'abc')
- send('2', transform_1, b'def')
+ stream11 = from_channel.output_stream('1', transform_1)
+ stream11.write(b'abc')
+ stream21 = from_channel.output_stream('2', transform_1)
+ stream21.write(b'def')
+ if not time_based_flush:
+ stream11.close()
self.assertEqual(
list(
itertools.islice(to_channel.input_elements('1', [transform_1]),
1)),
[
beam_fn_api_pb2.Elements.Data(
instruction_id='1', transform_id=transform_1, data=b'abc')
])
- send('2', transform_2, b'ghi')
+ if time_based_flush:
Review comment:
I would have expected that `from_channel.output_stream` would share the
same queue of elements when dealing with the same instruction id instead of two
different queues with separate flushing characteristics.
##########
File path: sdks/python/apache_beam/runners/worker/data_plane_test.py
##########
@@ -108,16 +106,28 @@ def send(instruction_id, transform_id, data):
])
# Multiple interleaved writes to multiple instructions.
- send('1', transform_1, b'abc')
- send('2', transform_1, b'def')
+ stream11 = from_channel.output_stream('1', transform_1)
+ stream11.write(b'abc')
+ stream21 = from_channel.output_stream('2', transform_1)
+ stream21.write(b'def')
+ if not time_based_flush:
+ stream11.close()
self.assertEqual(
list(
itertools.islice(to_channel.input_elements('1', [transform_1]),
1)),
[
beam_fn_api_pb2.Elements.Data(
instruction_id='1', transform_id=transform_1, data=b'abc')
])
- send('2', transform_2, b'ghi')
+ if time_based_flush:
Review comment:
I would have expected that `from_channel.output_stream` would share the
same queue of elements across distinct transforms when dealing with the same
instruction id instead of multiple different queues with separate flushing
characteristics.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]