Repository: beam Updated Branches: refs/heads/master 2689ca43c -> 420a71860
Reduce cost of sample combine test There's no need to run this test 300 times, which takes tens of seconds. Also split global and per-key sampling into separate tests. Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d1dc391d Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d1dc391d Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d1dc391d Branch: refs/heads/master Commit: d1dc391dd17345368a03187591a99ddf3d18282f Parents: 2689ca4 Author: Robert Bradshaw <rober...@gmail.com> Authored: Tue Jan 31 12:51:27 2017 -0800 Committer: Robert Bradshaw <rober...@google.com> Committed: Wed Feb 1 10:56:53 2017 -0800 ---------------------------------------------------------------------- .../apache_beam/transforms/combiners_test.py | 32 ++++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/d1dc391d/sdks/python/apache_beam/transforms/combiners_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/transforms/combiners_test.py b/sdks/python/apache_beam/transforms/combiners_test.py index 8a6d352..ba8ae82 100644 --- a/sdks/python/apache_beam/transforms/combiners_test.py +++ b/sdks/python/apache_beam/transforms/combiners_test.py @@ -218,29 +218,21 @@ class CombineTest(unittest.TestCase): assert_that(result_kbot, equal_to([('a', [0, 1, 1, 1])]), label='k:bot') pipeline.run() - def test_sample(self): + def test_global_sample(self): - # First test global samples (lots of them). - for ix in xrange(300): - pipeline = TestPipeline() + def is_good_sample(actual): + assert len(actual) == 1 + assert sorted(actual[0]) in [[1, 1, 2], [1, 2, 2]], actual + + with TestPipeline() as pipeline: pcoll = pipeline | 'start' >> Create([1, 1, 2, 2]) - result = pcoll | combine.Sample.FixedSizeGlobally('sample-%d' % ix, 3) - - def matcher(): - def match(actual): - # There is always exactly one result. - equal_to([1])([len(actual)]) - # There are always exactly three samples in the result. - equal_to([3])([len(actual[0])]) - # Sampling is without replacement. - num_ones = sum(1 for x in actual[0] if x == 1) - num_twos = sum(1 for x in actual[0] if x == 2) - equal_to([1, 2])([num_ones, num_twos]) - return match - assert_that(result, matcher()) - pipeline.run() + for ix in xrange(30): + assert_that( + pcoll | combine.Sample.FixedSizeGlobally('sample-%d' % ix, 3), + is_good_sample, + label='check-%d' % ix) - # Now test per-key samples. + def test_per_key_sample(self): pipeline = TestPipeline() pcoll = pipeline | 'start-perkey' >> Create( sum(([(i, 1), (i, 1), (i, 2), (i, 2)] for i in xrange(300)), []))