Repository: beam
Updated Branches:
  refs/heads/master 2689ca43c -> 420a71860


Reduce cost of sample combine test

There's no need to run this test 300 times, which takes tens of seconds.

Also split global and per-key sampling into separate tests.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d1dc391d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d1dc391d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d1dc391d

Branch: refs/heads/master
Commit: d1dc391dd17345368a03187591a99ddf3d18282f
Parents: 2689ca4
Author: Robert Bradshaw <rober...@gmail.com>
Authored: Tue Jan 31 12:51:27 2017 -0800
Committer: Robert Bradshaw <rober...@google.com>
Committed: Wed Feb 1 10:56:53 2017 -0800

----------------------------------------------------------------------
 .../apache_beam/transforms/combiners_test.py    | 32 ++++++++------------
 1 file changed, 12 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d1dc391d/sdks/python/apache_beam/transforms/combiners_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/combiners_test.py 
b/sdks/python/apache_beam/transforms/combiners_test.py
index 8a6d352..ba8ae82 100644
--- a/sdks/python/apache_beam/transforms/combiners_test.py
+++ b/sdks/python/apache_beam/transforms/combiners_test.py
@@ -218,29 +218,21 @@ class CombineTest(unittest.TestCase):
     assert_that(result_kbot, equal_to([('a', [0, 1, 1, 1])]), label='k:bot')
     pipeline.run()
 
-  def test_sample(self):
+  def test_global_sample(self):
 
-    # First test global samples (lots of them).
-    for ix in xrange(300):
-      pipeline = TestPipeline()
+    def is_good_sample(actual):
+      assert len(actual) == 1
+      assert sorted(actual[0]) in [[1, 1, 2], [1, 2, 2]], actual
+
+    with TestPipeline() as pipeline:
       pcoll = pipeline | 'start' >> Create([1, 1, 2, 2])
-      result = pcoll | combine.Sample.FixedSizeGlobally('sample-%d' % ix, 3)
-
-      def matcher():
-        def match(actual):
-          # There is always exactly one result.
-          equal_to([1])([len(actual)])
-          # There are always exactly three samples in the result.
-          equal_to([3])([len(actual[0])])
-          # Sampling is without replacement.
-          num_ones = sum(1 for x in actual[0] if x == 1)
-          num_twos = sum(1 for x in actual[0] if x == 2)
-          equal_to([1, 2])([num_ones, num_twos])
-        return match
-      assert_that(result, matcher())
-      pipeline.run()
+      for ix in xrange(30):
+        assert_that(
+            pcoll | combine.Sample.FixedSizeGlobally('sample-%d' % ix, 3),
+            is_good_sample,
+            label='check-%d' % ix)
 
-    # Now test per-key samples.
+  def test_per_key_sample(self):
     pipeline = TestPipeline()
     pcoll = pipeline | 'start-perkey' >> Create(
         sum(([(i, 1), (i, 1), (i, 2), (i, 2)] for i in xrange(300)), []))

Reply via email to