[jira] [Assigned] (BEAM-10140) Add cross-language wrapper for Java's SpannerIO Read
[ https://issues.apache.org/jira/browse/BEAM-10140?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Michał Walenia reassigned BEAM-10140: - Assignee: Michał Walenia > Add cross-language wrapper for Java's SpannerIO Read > > > Key: BEAM-10140 > URL: https://issues.apache.org/jira/browse/BEAM-10140 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Affects Versions: Not applicable >Reporter: Piotr Szuberski >Assignee: Michał Walenia >Priority: P2 > Labels: portability > Fix For: Not applicable > > > Add cross-language wrapper for Java's SpannerIO Read -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (BEAM-10139) Add cross-language wrapper for Java's SpannerIO Write
[ https://issues.apache.org/jira/browse/BEAM-10139?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Michał Walenia reassigned BEAM-10139: - Assignee: Michał Walenia > Add cross-language wrapper for Java's SpannerIO Write > - > > Key: BEAM-10139 > URL: https://issues.apache.org/jira/browse/BEAM-10139 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Affects Versions: Not applicable >Reporter: Piotr Szuberski >Assignee: Michał Walenia >Priority: P2 > Labels: portability > Fix For: Not applicable > > > Add cross-language wrapper for Java's SpannerIO Write -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9008) Add readAll() method to CassandraIO
[ https://issues.apache.org/jira/browse/BEAM-9008?focusedWorklogId=440603&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440603 ] ASF GitHub Bot logged work on BEAM-9008: Author: ASF GitHub Bot Created on: 03/Jun/20 05:38 Start Date: 03/Jun/20 05:38 Worklog Time Spent: 10m Work Description: vmarquez commented on a change in pull request #10546: URL: https://github.com/apache/beam/pull/10546#discussion_r434318846 ## File path: sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java ## @@ -370,384 +488,16 @@ private CassandraIO() {} return autoBuild(); } } - } - - @VisibleForTesting - static class CassandraSource extends BoundedSource { -final Read spec; -final List splitQueries; -// split source ached size - can't be calculated when already split -Long estimatedSize; -private static final String MURMUR3PARTITIONER = "org.apache.cassandra.dht.Murmur3Partitioner"; - -CassandraSource(Read spec, List splitQueries) { - this(spec, splitQueries, null); -} - -private CassandraSource(Read spec, List splitQueries, Long estimatedSize) { - this.estimatedSize = estimatedSize; - this.spec = spec; - this.splitQueries = splitQueries; -} - -@Override -public Coder getOutputCoder() { - return spec.coder(); -} - -@Override -public BoundedReader createReader(PipelineOptions pipelineOptions) { - return new CassandraReader(this); -} - -@Override -public List> split( -long desiredBundleSizeBytes, PipelineOptions pipelineOptions) { - try (Cluster cluster = - getCluster( - spec.hosts(), - spec.port(), - spec.username(), - spec.password(), - spec.localDc(), - spec.consistencyLevel())) { -if (isMurmur3Partitioner(cluster)) { - LOG.info("Murmur3Partitioner detected, splitting"); - return splitWithTokenRanges( - spec, desiredBundleSizeBytes, getEstimatedSizeBytes(pipelineOptions), cluster); -} else { - LOG.warn( - "Only Murmur3Partitioner is supported for splitting, using a unique source for " - + "the read"); - return Collections.singletonList( - new CassandraIO.CassandraSource<>(spec, Collections.singletonList(buildQuery(spec; -} - } -} - -private static String buildQuery(Read spec) { - return (spec.query() == null) - ? String.format("SELECT * FROM %s.%s", spec.keyspace().get(), spec.table().get()) - : spec.query().get().toString(); -} - -/** - * Compute the number of splits based on the estimated size and the desired bundle size, and - * create several sources. - */ -private List> splitWithTokenRanges( -CassandraIO.Read spec, -long desiredBundleSizeBytes, -long estimatedSizeBytes, -Cluster cluster) { - long numSplits = - getNumSplits(desiredBundleSizeBytes, estimatedSizeBytes, spec.minNumberOfSplits()); - LOG.info("Number of desired splits is {}", numSplits); - - SplitGenerator splitGenerator = new SplitGenerator(cluster.getMetadata().getPartitioner()); - List tokens = - cluster.getMetadata().getTokenRanges().stream() - .map(tokenRange -> new BigInteger(tokenRange.getEnd().getValue().toString())) - .collect(Collectors.toList()); - List> splits = splitGenerator.generateSplits(numSplits, tokens); - LOG.info("{} splits were actually generated", splits.size()); - - final String partitionKey = - cluster.getMetadata().getKeyspace(spec.keyspace().get()).getTable(spec.table().get()) - .getPartitionKey().stream() - .map(ColumnMetadata::getName) - .collect(Collectors.joining(",")); - - List tokenRanges = - getTokenRanges(cluster, spec.keyspace().get(), spec.table().get()); - final long estimatedSize = getEstimatedSizeBytesFromTokenRanges(tokenRanges) / splits.size(); - - List> sources = new ArrayList<>(); - for (List split : splits) { -List queries = new ArrayList<>(); -for (RingRange range : split) { - if (range.isWrapping()) { Review comment: Hm, let me think about this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440603) Time Spent: 13h 10m (was: 13h) > Add readAll() method to CassandraIO
[jira] [Work logged] (BEAM-9008) Add readAll() method to CassandraIO
[ https://issues.apache.org/jira/browse/BEAM-9008?focusedWorklogId=440602&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440602 ] ASF GitHub Bot logged work on BEAM-9008: Author: ASF GitHub Bot Created on: 03/Jun/20 05:37 Start Date: 03/Jun/20 05:37 Worklog Time Spent: 10m Work Description: vmarquez commented on a change in pull request #10546: URL: https://github.com/apache/beam/pull/10546#discussion_r434318532 ## File path: sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java ## @@ -326,7 +371,78 @@ private CassandraIO() {} checkArgument(entity() != null, "withEntity() is required"); checkArgument(coder() != null, "withCoder() is required"); - return input.apply(org.apache.beam.sdk.io.Read.from(new CassandraSource<>(this, null))); + ReadAll readAll = CassandraIO.readAll().withCoder(this.coder()); + + return input + .apply(Create.of(this)) + .apply(ParDo.of(new SplitFn())) + .setCoder(SerializableCoder.of(new TypeDescriptor>() {})) + // .apply(Reshuffle.viaRandomKey()) + .apply(readAll); +} + +private class SplitFn extends DoFn, Read> { + + @ProcessElement + public void process( + @Element CassandraIO.Read read, OutputReceiver> outputReceiver) { + +try (Cluster cluster = +getCluster( +read.hosts(), +read.port(), +read.username(), +read.password(), +read.localDc(), +read.consistencyLevel())) { + if (isMurmur3Partitioner(cluster)) { +LOG.info("Murmur3Partitioner detected, splitting"); + +List tokens = +cluster.getMetadata().getTokenRanges().stream() +.map(tokenRange -> new BigInteger(tokenRange.getEnd().getValue().toString())) +.collect(Collectors.toList()); +Integer splitCount = cluster.getMetadata().getAllHosts().size(); +if (read.minNumberOfSplits() != null && read.minNumberOfSplits().get() != null) { + splitCount = read.minNumberOfSplits().get(); +} + +SplitGenerator splitGenerator = +new SplitGenerator(cluster.getMetadata().getPartitioner()); +splitGenerator +.generateSplits(splitCount, tokens) +.forEach( +rr -> +outputReceiver.output( +CassandraIO.read() Review comment: I think we still need to create a new `Read` because the `SplitGenerator` returns a `List>`, so each of the outer list will be a different Read, the inner List will be set to the RingRange. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440602) Time Spent: 13h (was: 12h 50m) > Add readAll() method to CassandraIO > --- > > Key: BEAM-9008 > URL: https://issues.apache.org/jira/browse/BEAM-9008 > Project: Beam > Issue Type: New Feature > Components: io-java-cassandra >Affects Versions: 2.16.0 >Reporter: vincent marquez >Assignee: vincent marquez >Priority: P3 > Time Spent: 13h > Remaining Estimate: 0h > > When querying a large cassandra database, it's often *much* more useful to > programatically generate the queries needed to to be run rather than reading > all partitions and attempting some filtering. > As an example: > {code:java} > public class Event { >@PartitionKey(0) public UUID accountId; >@PartitionKey(1)public String yearMonthDay; >@ClusteringKey public UUID eventId; >//other data... > }{code} > If there is ten years worth of data, you may want to only query one year's > worth. Here each token range would represent one 'token' but all events for > the day. > {code:java} > Set accounts = getRelevantAccounts(); > Set dateRange = generateDateRange("2018-01-01", "2019-01-01"); > PCollection tokens = generateTokens(accounts, dateRange); > {code} > > I propose an additional _readAll()_ PTransform that can take a PCollection > of token ranges and can return a PCollection of what the query would > return. > *Question: How much code should be in common between both methods?* > Currently the read connector already groups all partitions into a List of > Token Ranges, so it would be simple to refactor the current read() based > me
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440601&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440601 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 03/Jun/20 05:32 Start Date: 03/Jun/20 05:32 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11898: URL: https://github.com/apache/beam/pull/11898#issuecomment-637963899 Run Java PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440601) Time Spent: 1h 50m (was: 1h 40m) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 1h 50m > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:312) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:292) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at org.junit.runners.ParentRunner.run(ParentRunner.java:396) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.runTestClass(JUnitTestClassExecutor.java:110) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor
[jira] [Work logged] (BEAM-9008) Add readAll() method to CassandraIO
[ https://issues.apache.org/jira/browse/BEAM-9008?focusedWorklogId=440596&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440596 ] ASF GitHub Bot logged work on BEAM-9008: Author: ASF GitHub Bot Created on: 03/Jun/20 04:45 Start Date: 03/Jun/20 04:45 Worklog Time Spent: 10m Work Description: vmarquez commented on a change in pull request #10546: URL: https://github.com/apache/beam/pull/10546#discussion_r434304763 ## File path: sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java ## @@ -480,66 +527,22 @@ public void testCustomMapperImplDelete() { assertEquals(1, counter.intValue()); } - @Test - public void testSplit() throws Exception { -PipelineOptions options = PipelineOptionsFactory.create(); -CassandraIO.Read read = -CassandraIO.read() -.withHosts(Collections.singletonList(CASSANDRA_HOST)) -.withPort(cassandraPort) -.withKeyspace(CASSANDRA_KEYSPACE) -.withTable(CASSANDRA_TABLE) -.withEntity(Scientist.class) -.withCoder(SerializableCoder.of(Scientist.class)); - -// initialSource will be read without splitting (which does not happen in production) -// so we need to provide splitQueries to avoid NPE in source.reader.start() -String splitQuery = QueryBuilder.select().from(CASSANDRA_KEYSPACE, CASSANDRA_TABLE).toString(); -CassandraIO.CassandraSource initialSource = -new CassandraIO.CassandraSource<>(read, Collections.singletonList(splitQuery)); -int desiredBundleSizeBytes = 2048; -long estimatedSize = initialSource.getEstimatedSizeBytes(options); -List> splits = initialSource.split(desiredBundleSizeBytes, options); -SourceTestUtils.assertSourcesEqualReferenceSource(initialSource, splits, options); -float expectedNumSplitsloat = -(float) initialSource.getEstimatedSizeBytes(options) / desiredBundleSizeBytes; -long sum = 0; - -for (BoundedSource subSource : splits) { - sum += subSource.getEstimatedSizeBytes(options); -} - -// due to division and cast estimateSize != sum but will be close. Exact equals checked below -assertEquals((long) (estimatedSize / splits.size()) * splits.size(), sum); - -int expectedNumSplits = (int) Math.ceil(expectedNumSplitsloat); -assertEquals("Wrong number of splits", expectedNumSplits, splits.size()); -int emptySplits = 0; -for (BoundedSource subSource : splits) { - if (readFromSource(subSource, options).isEmpty()) { -emptySplits += 1; - } -} -assertThat( -"There are too many empty splits, parallelism is sub-optimal", -emptySplits, -lessThan((int) (ACCEPTABLE_EMPTY_SPLITS_PERCENTAGE * splits.size(; - } - private List getRows(String table) { ResultSet result = session.execute( String.format("select person_id,person_name from %s.%s", CASSANDRA_KEYSPACE, table)); return result.all(); } + // TEMP TEST Review comment: Ooops I think this was left in from when I was experimenting with something, I will remove. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440596) Time Spent: 12h 50m (was: 12h 40m) > Add readAll() method to CassandraIO > --- > > Key: BEAM-9008 > URL: https://issues.apache.org/jira/browse/BEAM-9008 > Project: Beam > Issue Type: New Feature > Components: io-java-cassandra >Affects Versions: 2.16.0 >Reporter: vincent marquez >Assignee: vincent marquez >Priority: P3 > Time Spent: 12h 50m > Remaining Estimate: 0h > > When querying a large cassandra database, it's often *much* more useful to > programatically generate the queries needed to to be run rather than reading > all partitions and attempting some filtering. > As an example: > {code:java} > public class Event { >@PartitionKey(0) public UUID accountId; >@PartitionKey(1)public String yearMonthDay; >@ClusteringKey public UUID eventId; >//other data... > }{code} > If there is ten years worth of data, you may want to only query one year's > worth. Here each token range would represent one 'token' but all events for > the day. > {code:java} > Set accounts = getRelevantAccounts(); > Set dateRange = generateDateRange("2018-01-01", "2019-01-01"); > PCollection tokens = generateTokens(accounts, dateRange); > {code} > > I propose
[jira] [Work logged] (BEAM-9008) Add readAll() method to CassandraIO
[ https://issues.apache.org/jira/browse/BEAM-9008?focusedWorklogId=440594&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440594 ] ASF GitHub Bot logged work on BEAM-9008: Author: ASF GitHub Bot Created on: 03/Jun/20 04:41 Start Date: 03/Jun/20 04:41 Worklog Time Spent: 10m Work Description: vmarquez commented on a change in pull request #10546: URL: https://github.com/apache/beam/pull/10546#discussion_r434303784 ## File path: sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java ## @@ -326,7 +371,78 @@ private CassandraIO() {} checkArgument(entity() != null, "withEntity() is required"); checkArgument(coder() != null, "withCoder() is required"); - return input.apply(org.apache.beam.sdk.io.Read.from(new CassandraSource<>(this, null))); + ReadAll readAll = CassandraIO.readAll().withCoder(this.coder()); + + return input + .apply(Create.of(this)) + .apply(ParDo.of(new SplitFn())) + .setCoder(SerializableCoder.of(new TypeDescriptor>() {})) + // .apply(Reshuffle.viaRandomKey()) + .apply(readAll); +} + +private class SplitFn extends DoFn, Read> { + + @ProcessElement + public void process( + @Element CassandraIO.Read read, OutputReceiver> outputReceiver) { + +try (Cluster cluster = +getCluster( +read.hosts(), +read.port(), +read.username(), +read.password(), +read.localDc(), +read.consistencyLevel())) { + if (isMurmur3Partitioner(cluster)) { Review comment: Oops I did forget that, I will add. Good catch. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440594) Time Spent: 12h 40m (was: 12.5h) > Add readAll() method to CassandraIO > --- > > Key: BEAM-9008 > URL: https://issues.apache.org/jira/browse/BEAM-9008 > Project: Beam > Issue Type: New Feature > Components: io-java-cassandra >Affects Versions: 2.16.0 >Reporter: vincent marquez >Assignee: vincent marquez >Priority: P3 > Time Spent: 12h 40m > Remaining Estimate: 0h > > When querying a large cassandra database, it's often *much* more useful to > programatically generate the queries needed to to be run rather than reading > all partitions and attempting some filtering. > As an example: > {code:java} > public class Event { >@PartitionKey(0) public UUID accountId; >@PartitionKey(1)public String yearMonthDay; >@ClusteringKey public UUID eventId; >//other data... > }{code} > If there is ten years worth of data, you may want to only query one year's > worth. Here each token range would represent one 'token' but all events for > the day. > {code:java} > Set accounts = getRelevantAccounts(); > Set dateRange = generateDateRange("2018-01-01", "2019-01-01"); > PCollection tokens = generateTokens(accounts, dateRange); > {code} > > I propose an additional _readAll()_ PTransform that can take a PCollection > of token ranges and can return a PCollection of what the query would > return. > *Question: How much code should be in common between both methods?* > Currently the read connector already groups all partitions into a List of > Token Ranges, so it would be simple to refactor the current read() based > method to a 'ParDo' based one and have them both share the same function. > Reasons against sharing code between read and readAll > * Not having the read based method return a BoundedSource connector would > mean losing the ability to know the size of the data returned > * Currently the CassandraReader executes all the grouped TokenRange queries > *asynchronously* which is (maybe?) fine when all that's happening is > splitting up all the partition ranges but terrible for executing potentially > millions of queries. > Reasons _for_ sharing code would be simplified code base and that both of > the above issues would most likely have a negligable performance impact. > > > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10027) Support for Kotlin-based Beam Katas
[ https://issues.apache.org/jira/browse/BEAM-10027?focusedWorklogId=440593&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440593 ] ASF GitHub Bot logged work on BEAM-10027: - Author: ASF GitHub Bot Created on: 03/Jun/20 04:33 Start Date: 03/Jun/20 04:33 Worklog Time Spent: 10m Work Description: henryken commented on pull request #11761: URL: https://github.com/apache/beam/pull/11761#issuecomment-637946055 We've made it! 🙌 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440593) Time Spent: 15h 40m (was: 15.5h) > Support for Kotlin-based Beam Katas > --- > > Key: BEAM-10027 > URL: https://issues.apache.org/jira/browse/BEAM-10027 > Project: Beam > Issue Type: Improvement > Components: katas >Reporter: Rion Williams >Assignee: Rion Williams >Priority: P2 > Original Estimate: 8h > Time Spent: 15h 40m > Remaining Estimate: 0h > > Currently, there are a series of examples available demonstrating the use of > Apache Beam with Kotlin. It would be nice to have support for the same Beam > Katas that exist for Python, Go, and Java to also support Kotlin. > The port itself shouldn't be that involved since it can still target the JVM, > so it would likely just require the inclusion for Kotlin dependencies and a > conversion for all of the existing Java examples. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440592&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440592 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 03/Jun/20 04:32 Start Date: 03/Jun/20 04:32 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11898: URL: https://github.com/apache/beam/pull/11898#issuecomment-637945801 Run Java PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440592) Time Spent: 1h 40m (was: 1.5h) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 1h 40m > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:312) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:292) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at org.junit.runners.ParentRunner.run(ParentRunner.java:396) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.runTestClass(JUnitTestClassExecutor.java:110) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.e
[jira] [Updated] (BEAM-998) Consider asking Apache to register Apache Beam trademark
[ https://issues.apache.org/jira/browse/BEAM-998?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Beam JIRA Bot updated BEAM-998: --- Labels: stale-P2 (was: ) > Consider asking Apache to register Apache Beam trademark > > > Key: BEAM-998 > URL: https://issues.apache.org/jira/browse/BEAM-998 > Project: Beam > Issue Type: Task > Components: project-management >Affects Versions: Not applicable >Reporter: Dan Halperin >Priority: P2 > Labels: stale-P2 > > "Registered Trademarks If a PMC would like to request legal registration of > their project's trademarks, please registering their marks, please follow the > REGREQUEST instructions." > http://www.apache.org/foundation/marks/pmcs#other > The link to REGREQUEST: > http://www.apache.org/foundation/marks/register#register -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (BEAM-998) Consider asking Apache to register Apache Beam trademark
[ https://issues.apache.org/jira/browse/BEAM-998?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17124586#comment-17124586 ] Beam JIRA Bot commented on BEAM-998: This issue is P2 but has been unassigned without any comment for 60 days so it has been labeled "stale-P2". If this issue is still affecting you, we care! Please comment and remove the label. Otherwise, in 14 days the issue will be moved to P3. Please see https://beam.apache.org/contribute/jira-priorities/ for a detailed explanation of what these priorities mean. > Consider asking Apache to register Apache Beam trademark > > > Key: BEAM-998 > URL: https://issues.apache.org/jira/browse/BEAM-998 > Project: Beam > Issue Type: Task > Components: project-management >Affects Versions: Not applicable >Reporter: Dan Halperin >Priority: P2 > Labels: stale-P2 > > "Registered Trademarks If a PMC would like to request legal registration of > their project's trademarks, please registering their marks, please follow the > REGREQUEST instructions." > http://www.apache.org/foundation/marks/pmcs#other > The link to REGREQUEST: > http://www.apache.org/foundation/marks/register#register -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (BEAM-2939) Fn API SDF support
[ https://issues.apache.org/jira/browse/BEAM-2939?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17124583#comment-17124583 ] Beam JIRA Bot commented on BEAM-2939: - This issue is assigned but has not received an update in 30 days so it has been labeled "stale-assigned". If you are still working on the issue, please give an update and remove the label. If you are no longer working on the issue, please unassign so someone else may work on it. In 7 days the issue will be automatically unassigned. > Fn API SDF support > -- > > Key: BEAM-2939 > URL: https://issues.apache.org/jira/browse/BEAM-2939 > Project: Beam > Issue Type: Improvement > Components: beam-model >Reporter: Henning Rohde >Assignee: Luke Cwik >Priority: P2 > Labels: portability, stale-assigned > Time Spent: 35h 50m > Remaining Estimate: 0h > > The Fn API should support streaming SDF. Detailed design TBD. > Once design is ready, expand subtasks similarly to BEAM-2822. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (BEAM-2939) Fn API SDF support
[ https://issues.apache.org/jira/browse/BEAM-2939?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Beam JIRA Bot updated BEAM-2939: Labels: portability stale-assigned (was: portability) > Fn API SDF support > -- > > Key: BEAM-2939 > URL: https://issues.apache.org/jira/browse/BEAM-2939 > Project: Beam > Issue Type: Improvement > Components: beam-model >Reporter: Henning Rohde >Assignee: Luke Cwik >Priority: P2 > Labels: portability, stale-assigned > Time Spent: 35h 50m > Remaining Estimate: 0h > > The Fn API should support streaming SDF. Detailed design TBD. > Once design is ready, expand subtasks similarly to BEAM-2822. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9948) Add Firefly to website
[ https://issues.apache.org/jira/browse/BEAM-9948?focusedWorklogId=440588&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440588 ] ASF GitHub Bot logged work on BEAM-9948: Author: ASF GitHub Bot Created on: 03/Jun/20 04:00 Start Date: 03/Jun/20 04:00 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11780: URL: https://github.com/apache/beam/pull/11780#issuecomment-637938193 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440588) Time Spent: 2h (was: 1h 50m) > Add Firefly to website > -- > > Key: BEAM-9948 > URL: https://issues.apache.org/jira/browse/BEAM-9948 > Project: Beam > Issue Type: New Feature > Components: website >Reporter: Kyle Weaver >Assignee: Aizhamal Nurmamat kyzy >Priority: P2 > Time Spent: 2h > Remaining Estimate: 0h > > Beam has an adorable new mascot, the Firefly: > https://blogs.apache.org/foundation/entry/success-at-apache-bringing-the > We should add a usage guide for the Firefly to the website along with our > logos. (The blog post linked contains a model sheet for the Firefly we can > use.) -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9948) Add Firefly to website
[ https://issues.apache.org/jira/browse/BEAM-9948?focusedWorklogId=440587&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440587 ] ASF GitHub Bot logged work on BEAM-9948: Author: ASF GitHub Bot Created on: 03/Jun/20 03:55 Start Date: 03/Jun/20 03:55 Worklog Time Spent: 10m Work Description: aijamalnk commented on pull request #11780: URL: https://github.com/apache/beam/pull/11780#issuecomment-637937054 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440587) Time Spent: 1h 50m (was: 1h 40m) > Add Firefly to website > -- > > Key: BEAM-9948 > URL: https://issues.apache.org/jira/browse/BEAM-9948 > Project: Beam > Issue Type: New Feature > Components: website >Reporter: Kyle Weaver >Assignee: Aizhamal Nurmamat kyzy >Priority: P2 > Time Spent: 1h 50m > Remaining Estimate: 0h > > Beam has an adorable new mascot, the Firefly: > https://blogs.apache.org/foundation/entry/success-at-apache-bringing-the > We should add a usage guide for the Firefly to the website along with our > logos. (The blog post linked contains a model sheet for the Firefly we can > use.) -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9948) Add Firefly to website
[ https://issues.apache.org/jira/browse/BEAM-9948?focusedWorklogId=440586&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440586 ] ASF GitHub Bot logged work on BEAM-9948: Author: ASF GitHub Bot Created on: 03/Jun/20 03:55 Start Date: 03/Jun/20 03:55 Worklog Time Spent: 10m Work Description: aijamalnk commented on pull request #11780: URL: https://github.com/apache/beam/pull/11780#issuecomment-637936990 @iemejia can you try merging now? I've tried to rebase the change onto master This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440586) Time Spent: 1h 40m (was: 1.5h) > Add Firefly to website > -- > > Key: BEAM-9948 > URL: https://issues.apache.org/jira/browse/BEAM-9948 > Project: Beam > Issue Type: New Feature > Components: website >Reporter: Kyle Weaver >Assignee: Aizhamal Nurmamat kyzy >Priority: P2 > Time Spent: 1h 40m > Remaining Estimate: 0h > > Beam has an adorable new mascot, the Firefly: > https://blogs.apache.org/foundation/entry/success-at-apache-bringing-the > We should add a usage guide for the Firefly to the website along with our > logos. (The blog post linked contains a model sheet for the Firefly we can > use.) -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9948) Add Firefly to website
[ https://issues.apache.org/jira/browse/BEAM-9948?focusedWorklogId=440585&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440585 ] ASF GitHub Bot logged work on BEAM-9948: Author: ASF GitHub Bot Created on: 03/Jun/20 03:54 Start Date: 03/Jun/20 03:54 Worklog Time Spent: 10m Work Description: aijamalnk commented on pull request #11780: URL: https://github.com/apache/beam/pull/11780#issuecomment-637936845 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440585) Time Spent: 1.5h (was: 1h 20m) > Add Firefly to website > -- > > Key: BEAM-9948 > URL: https://issues.apache.org/jira/browse/BEAM-9948 > Project: Beam > Issue Type: New Feature > Components: website >Reporter: Kyle Weaver >Assignee: Aizhamal Nurmamat kyzy >Priority: P2 > Time Spent: 1.5h > Remaining Estimate: 0h > > Beam has an adorable new mascot, the Firefly: > https://blogs.apache.org/foundation/entry/success-at-apache-bringing-the > We should add a usage guide for the Firefly to the website along with our > logos. (The blog post linked contains a model sheet for the Firefly we can > use.) -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9977) Build Kafka Read on top of Java SplittableDoFn
[ https://issues.apache.org/jira/browse/BEAM-9977?focusedWorklogId=440571&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440571 ] ASF GitHub Bot logged work on BEAM-9977: Author: ASF GitHub Bot Created on: 03/Jun/20 02:54 Start Date: 03/Jun/20 02:54 Worklog Time Spent: 10m Work Description: boyuanzz commented on pull request #11749: URL: https://github.com/apache/beam/pull/11749#issuecomment-637922023 Run Java PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440571) Time Spent: 6h 40m (was: 6.5h) > Build Kafka Read on top of Java SplittableDoFn > -- > > Key: BEAM-9977 > URL: https://issues.apache.org/jira/browse/BEAM-9977 > Project: Beam > Issue Type: New Feature > Components: io-java-kafka >Reporter: Boyuan Zhang >Assignee: Boyuan Zhang >Priority: P2 > Time Spent: 6h 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-7074) FnApiRunner fails to wire multiple timer specs in single pardo
[ https://issues.apache.org/jira/browse/BEAM-7074?focusedWorklogId=440570&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440570 ] ASF GitHub Bot logged work on BEAM-7074: Author: ASF GitHub Bot Created on: 03/Jun/20 02:53 Start Date: 03/Jun/20 02:53 Worklog Time Spent: 10m Work Description: boyuanzz commented on pull request #11894: URL: https://github.com/apache/beam/pull/11894#issuecomment-637921787 @robertwb Please take another look : ) A simple clear timer support is added to FnApiRunner. Thanks! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440570) Time Spent: 1h 40m (was: 1.5h) > FnApiRunner fails to wire multiple timer specs in single pardo > -- > > Key: BEAM-7074 > URL: https://issues.apache.org/jira/browse/BEAM-7074 > Project: Beam > Issue Type: Bug > Components: sdk-py-harness >Affects Versions: 2.11.0 >Reporter: Thomas Weise >Priority: P2 > Labels: stale-P2 > Fix For: 2.21.0 > > Time Spent: 1h 40m > Remaining Estimate: 0h > > Multiple timer specs in a ParDo yield "NotImplementedError: Timers and side > inputs." -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10141) HL7v2IO read methods should assign sendTime timestamps
[ https://issues.apache.org/jira/browse/BEAM-10141?focusedWorklogId=440569&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440569 ] ASF GitHub Bot logged work on BEAM-10141: - Author: ASF GitHub Bot Created on: 03/Jun/20 02:47 Start Date: 03/Jun/20 02:47 Worklog Time Spent: 10m Work Description: jaketf commented on a change in pull request #11862: URL: https://github.com/apache/beam/pull/11862#discussion_r434278353 ## File path: sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HL7v2IO.java ## @@ -179,25 +189,51 @@ public static ListHL7v2Messages readAll(List hl7v2Stores) { return new ListHL7v2Messages(StaticValueProvider.of(hl7v2Stores), StaticValueProvider.of(null)); } + /** Read all HL7v2 Messages from multiple stores as sendTime {@link TimestampedValue}s. */ + public static ListTimestampedHL7v2Messages readAllWithTimestamps(List hl7v2Stores) { Review comment: this is a good point. Unfortunately, I think having adding replacing the existing static methods with `withFilter` builder would be interface breaking. I've added this method to `ListHL7v2Messages` as it might be more natural for some to use. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440569) Time Spent: 50m (was: 40m) > HL7v2IO read methods should assign sendTime timestamps > -- > > Key: BEAM-10141 > URL: https://issues.apache.org/jira/browse/BEAM-10141 > Project: Beam > Issue Type: Improvement > Components: io-java-gcp >Reporter: Jacob Ferriero >Assignee: Jacob Ferriero >Priority: P3 > Time Spent: 50m > Remaining Estimate: 0h > > Per Suggestion in this > [conversation|https://github.com/apache/beam/pull/11596#discussion_r427633240] > Add timestamped values and watermark estimate to > HL7v2IO.ListMessages. The same argument can be made for making HL7v2IO.Read > return timestamped values. > This should be optional and default to false to not be interface breaking. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9990) FhirIO should support conditional create / update methods
[ https://issues.apache.org/jira/browse/BEAM-9990?focusedWorklogId=440552&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440552 ] ASF GitHub Bot logged work on BEAM-9990: Author: ASF GitHub Bot Created on: 03/Jun/20 01:35 Start Date: 03/Jun/20 01:35 Worklog Time Spent: 10m Work Description: jaketf commented on pull request #11702: URL: https://github.com/apache/beam/pull/11702#issuecomment-637901651 @pabloem yes PTAL. I've removed one of the transforms because just in writing the tests I realized this was too difficult an interface to use. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440552) Time Spent: 50m (was: 40m) > FhirIO should support conditional create / update methods > - > > Key: BEAM-9990 > URL: https://issues.apache.org/jira/browse/BEAM-9990 > Project: Beam > Issue Type: Improvement > Components: io-java-gcp >Reporter: Jacob Ferriero >Assignee: Jacob Ferriero >Priority: P2 > Time Spent: 50m > Remaining Estimate: 0h > > There are many use cases where it is expected that calling executeBundles in > a distributed environment may fail (e.g. trying to create a resource that > already exists). > We should add classes to support the following methods as implementations of > FhirIO.Write to provide more robust reconciliation strategies for Dead Letter > Queues involving FhirIO.Write > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/conditionalUpdate > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/create > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/update -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10055) Add --region to 3 of the python examples
[ https://issues.apache.org/jira/browse/BEAM-10055?focusedWorklogId=440550&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440550 ] ASF GitHub Bot logged work on BEAM-10055: - Author: ASF GitHub Bot Created on: 03/Jun/20 01:34 Start Date: 03/Jun/20 01:34 Worklog Time Spent: 10m Work Description: aaltay merged pull request #11779: URL: https://github.com/apache/beam/pull/11779 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440550) Time Spent: 1h 10m (was: 1h) > Add --region to 3 of the python examples > > > Key: BEAM-10055 > URL: https://issues.apache.org/jira/browse/BEAM-10055 > Project: Beam > Issue Type: Bug > Components: sdk-py-core >Reporter: Ted Romer >Assignee: Ted Romer >Priority: P3 > Original Estimate: 1h > Time Spent: 1h 10m > Remaining Estimate: 0h > > Proposed fix: > {color:#FF}[https://github.com/tedromer/beam/compare/tedromer:ef811fe...tedromer:1f39865]{color} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9990) FhirIO should support conditional create / update methods
[ https://issues.apache.org/jira/browse/BEAM-9990?focusedWorklogId=440549&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440549 ] ASF GitHub Bot logged work on BEAM-9990: Author: ASF GitHub Bot Created on: 03/Jun/20 01:34 Start Date: 03/Jun/20 01:34 Worklog Time Spent: 10m Work Description: jaketf removed a comment on pull request #11702: URL: https://github.com/apache/beam/pull/11702#issuecomment-628285515 To highlight just the latest changes to FhirIO you can take a look at [this PR on my fork](https://github.com/jaketf/beam/pull/1/files) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440549) Time Spent: 40m (was: 0.5h) > FhirIO should support conditional create / update methods > - > > Key: BEAM-9990 > URL: https://issues.apache.org/jira/browse/BEAM-9990 > Project: Beam > Issue Type: Improvement > Components: io-java-gcp >Reporter: Jacob Ferriero >Assignee: Jacob Ferriero >Priority: P2 > Time Spent: 40m > Remaining Estimate: 0h > > There are many use cases where it is expected that calling executeBundles in > a distributed environment may fail (e.g. trying to create a resource that > already exists). > We should add classes to support the following methods as implementations of > FhirIO.Write to provide more robust reconciliation strategies for Dead Letter > Queues involving FhirIO.Write > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/conditionalUpdate > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/create > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/update -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9946) Enhance Partition transform to provide partitionfn with SideInputs
[ https://issues.apache.org/jira/browse/BEAM-9946?focusedWorklogId=440548&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440548 ] ASF GitHub Bot logged work on BEAM-9946: Author: ASF GitHub Bot Created on: 03/Jun/20 01:33 Start Date: 03/Jun/20 01:33 Worklog Time Spent: 10m Work Description: aaltay merged pull request #11682: URL: https://github.com/apache/beam/pull/11682 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440548) Remaining Estimate: 91h 40m (was: 91h 50m) Time Spent: 4h 20m (was: 4h 10m) > Enhance Partition transform to provide partitionfn with SideInputs > -- > > Key: BEAM-9946 > URL: https://issues.apache.org/jira/browse/BEAM-9946 > Project: Beam > Issue Type: New Feature > Components: sdk-java-core >Reporter: Darshan Jani >Assignee: Darshan Jani >Priority: P2 > Original Estimate: 96h > Time Spent: 4h 20m > Remaining Estimate: 91h 40m > > Currently _Partition_ transform can partition a collection into n collections > based on only _element_ value in _PartitionFn_ to decide on which partition a > particular element belongs to. > {code:java} > public interface PartitionFn extends Serializable { > int partitionFor(T elem, int numPartitions); > } > public static Partition of(int numPartitions, PartitionFn > partitionFn) { > return new Partition<>(new PartitionDoFn(numPartitions, partitionFn)); > } > {code} > It will be useful to introduce new API with additional _sideInputs_ provided > to partition function. User will be able to write logic to use both _element_ > value and _sideInputs_ to decide on which partition a particular element > belongs to. > Option-1: Proposed new API: > {code:java} > public interface PartitionWithSideInputsFn extends Serializable { > int partitionFor(T elem, int numPartitions, Context c); > } > public static Partition of(int numPartitions, > PartitionWithSideInputsFn partitionFn, Requirements requirements) { > ... > } > {code} > User can use any of the two APIs as per there partitioning function logic. > Option-2: Redesign old API with Builder Pattern which can provide optionally > a _Requirements_ with _sideInputs._ Deprecate old API. > {code:java} > // using sideviews > Partition.into(numberOfPartitions).via( > fn( > (input,c) -> { > // use c.sideInput(view) > // use input > // return partitionnumber > },requiresSideInputs(view)) > ) > // without using sideviews > Partition.into(numberOfPartitions).via( > fn((input,c) -> { > // use input > // return partitionnumber > }) > ) > {code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (BEAM-10168) Add Github "publish release" to release guide
[ https://issues.apache.org/jira/browse/BEAM-10168?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kyle Weaver resolved BEAM-10168. Fix Version/s: Not applicable Resolution: Fixed > Add Github "publish release" to release guide > - > > Key: BEAM-10168 > URL: https://issues.apache.org/jira/browse/BEAM-10168 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Kyle Weaver >Priority: P2 > Fix For: Not applicable > > Time Spent: 0.5h > Remaining Estimate: 0h > > Github does not recognize tags as full-fledged releases unless they are > published through the Github API/UI. We need to add this step to the release > guide. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10168) Add Github "publish release" to release guide
[ https://issues.apache.org/jira/browse/BEAM-10168?focusedWorklogId=440541&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440541 ] ASF GitHub Bot logged work on BEAM-10168: - Author: ASF GitHub Bot Created on: 03/Jun/20 01:04 Start Date: 03/Jun/20 01:04 Worklog Time Spent: 10m Work Description: ibzib merged pull request #11902: URL: https://github.com/apache/beam/pull/11902 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440541) Time Spent: 0.5h (was: 20m) > Add Github "publish release" to release guide > - > > Key: BEAM-10168 > URL: https://issues.apache.org/jira/browse/BEAM-10168 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Kyle Weaver >Priority: P2 > Time Spent: 0.5h > Remaining Estimate: 0h > > Github does not recognize tags as full-fledged releases unless they are > published through the Github API/UI. We need to add this step to the release > guide. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10168) Add Github "publish release" to release guide
[ https://issues.apache.org/jira/browse/BEAM-10168?focusedWorklogId=440540&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440540 ] ASF GitHub Bot logged work on BEAM-10168: - Author: ASF GitHub Bot Created on: 03/Jun/20 01:03 Start Date: 03/Jun/20 01:03 Worklog Time Spent: 10m Work Description: ibzib commented on pull request #11902: URL: https://github.com/apache/beam/pull/11902#issuecomment-637893319 Brian, if you want, you can try to "verify" the release by making sure the tag's signature matches whatever Github expects. But I don't think it's too important so I'm merging this as-is for now. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440540) Time Spent: 20m (was: 10m) > Add Github "publish release" to release guide > - > > Key: BEAM-10168 > URL: https://issues.apache.org/jira/browse/BEAM-10168 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Kyle Weaver >Priority: P2 > Time Spent: 20m > Remaining Estimate: 0h > > Github does not recognize tags as full-fledged releases unless they are > published through the Github API/UI. We need to add this step to the release > guide. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (BEAM-9684) Boss Level | Go SDK Code Katas
[ https://issues.apache.org/jira/browse/BEAM-9684?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Damon Douglas reassigned BEAM-9684: --- Assignee: (was: Damon Douglas) > Boss Level | Go SDK Code Katas > -- > > Key: BEAM-9684 > URL: https://issues.apache.org/jira/browse/BEAM-9684 > Project: Beam > Issue Type: Sub-task > Components: katas, sdk-go >Reporter: Damon Douglas >Priority: P2 > Labels: stale-assigned > > A challenging kata testing an individual on multiple concepts learned from > previous katas, partially patterned after > [https://github.com/apache/beam/tree/master/learning/katas/java/Examples/Word%20Count]. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (BEAM-9681) IO | Go SDK Code Katas
[ https://issues.apache.org/jira/browse/BEAM-9681?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Damon Douglas reassigned BEAM-9681: --- Assignee: (was: Damon Douglas) > IO | Go SDK Code Katas > -- > > Key: BEAM-9681 > URL: https://issues.apache.org/jira/browse/BEAM-9681 > Project: Beam > Issue Type: Sub-task > Components: katas, sdk-go >Reporter: Damon Douglas >Priority: P2 > Labels: stale-assigned > > A kata devoted to available pipeline I/O transforms patterns after > [https://github.com/apache/beam/tree/master/learning/katas/java/IO]. The > take away for an individual is to master reading from and writing to common > and built-in data sources and sinks. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (BEAM-9682) Windowing | Go SDK Code Katas
[ https://issues.apache.org/jira/browse/BEAM-9682?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Damon Douglas reassigned BEAM-9682: --- Assignee: (was: Damon Douglas) > Windowing | Go SDK Code Katas > - > > Key: BEAM-9682 > URL: https://issues.apache.org/jira/browse/BEAM-9682 > Project: Beam > Issue Type: Sub-task > Components: katas, sdk-go >Reporter: Damon Douglas >Priority: P2 > Labels: stale-assigned > > A kata devoted to windowing patterned after > [https://github.com/apache/beam/tree/master/learning/katas/java/Windowing]. > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (BEAM-9680) Common Transforms | Go SDK Code Katas
[ https://issues.apache.org/jira/browse/BEAM-9680?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Damon Douglas reassigned BEAM-9680: --- Assignee: (was: Damon Douglas) > Common Transforms | Go SDK Code Katas > - > > Key: BEAM-9680 > URL: https://issues.apache.org/jira/browse/BEAM-9680 > Project: Beam > Issue Type: Sub-task > Components: katas, sdk-go >Reporter: Damon Douglas >Priority: P2 > Labels: stale-assigned > > A kata devoted to common Apache beam transforms patterns after > [https://github.com/apache/beam/tree/master/learning/katas/java/Common%20Transforms]. > The take away for an individual is to master the following using the Golang > SDK. > * Aggregation > * Filter > * Key/Value (i.e. > [https://beam.apache.org/releases/javadoc/2.19.0/org/apache/beam/sdk/values/KV.html]) -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (BEAM-9683) Triggers | Go SDK Code Katas
[ https://issues.apache.org/jira/browse/BEAM-9683?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Damon Douglas reassigned BEAM-9683: --- Assignee: (was: Damon Douglas) > Triggers | Go SDK Code Katas > > > Key: BEAM-9683 > URL: https://issues.apache.org/jira/browse/BEAM-9683 > Project: Beam > Issue Type: Sub-task > Components: katas, sdk-go >Reporter: Damon Douglas >Priority: P2 > Labels: stale-assigned > > A kata devoted to triggers in Apache Beam patterned after > [https://github.com/apache/beam/tree/master/learning/katas/java/Triggers]. > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (BEAM-9676) Go SDK Code Katas
[ https://issues.apache.org/jira/browse/BEAM-9676?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17124489#comment-17124489 ] Damon Douglas commented on BEAM-9676: - Hello [~kenn], It's subtasks are where the activity is happening. Should we instead reference this task in pull requests instead of the subtasks? > Go SDK Code Katas > - > > Key: BEAM-9676 > URL: https://issues.apache.org/jira/browse/BEAM-9676 > Project: Beam > Issue Type: Improvement > Components: katas, sdk-go >Reporter: Robert Burke >Assignee: Damon Douglas >Priority: P2 > Labels: stale-assigned > > There should be code katas for the Go SDK similar to the Java and Python SDKs. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9990) FhirIO should support conditional create / update methods
[ https://issues.apache.org/jira/browse/BEAM-9990?focusedWorklogId=440537&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440537 ] ASF GitHub Bot logged work on BEAM-9990: Author: ASF GitHub Bot Created on: 03/Jun/20 00:52 Start Date: 03/Jun/20 00:52 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11702: URL: https://github.com/apache/beam/pull/11702#issuecomment-637890525 should I take a look at the APIs for the transforms? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440537) Time Spent: 0.5h (was: 20m) > FhirIO should support conditional create / update methods > - > > Key: BEAM-9990 > URL: https://issues.apache.org/jira/browse/BEAM-9990 > Project: Beam > Issue Type: Improvement > Components: io-java-gcp >Reporter: Jacob Ferriero >Assignee: Jacob Ferriero >Priority: P2 > Time Spent: 0.5h > Remaining Estimate: 0h > > There are many use cases where it is expected that calling executeBundles in > a distributed environment may fail (e.g. trying to create a resource that > already exists). > We should add classes to support the following methods as implementations of > FhirIO.Write to provide more robust reconciliation strategies for Dead Letter > Queues involving FhirIO.Write > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/conditionalUpdate > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/create > https://cloud.google.com/healthcare/docs/reference/rest/v1beta1/projects.locations.datasets.fhirStores.fhir/update -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10132) Remove reference to apachebeam/*
[ https://issues.apache.org/jira/browse/BEAM-10132?focusedWorklogId=440529&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440529 ] ASF GitHub Bot logged work on BEAM-10132: - Author: ASF GitHub Bot Created on: 03/Jun/20 00:24 Start Date: 03/Jun/20 00:24 Worklog Time Spent: 10m Work Description: ibzib opened a new pull request #11905: URL: https://github.com/apache/beam/pull/11905 R: @mxm Thank you for your contribution! Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] [**Choose reviewer(s)**](https://beam.apache.org/contribute/#make-your-change) and mention them in a comment (`R: @username`). - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] Update `CHANGES.md` with noteworthy changes. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). See the [Contributor Guide](https://beam.apache.org/contribute) for more tips on [how to make review process smoother](https://beam.apache.org/contribute/#make-reviewers-job-easier). Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunne
[jira] [Resolved] (BEAM-10167) Fix 2.21.0 downloads link in blog post
[ https://issues.apache.org/jira/browse/BEAM-10167?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kyle Weaver resolved BEAM-10167. Fix Version/s: Not applicable Resolution: Fixed > Fix 2.21.0 downloads link in blog post > -- > > Key: BEAM-10167 > URL: https://issues.apache.org/jira/browse/BEAM-10167 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Kyle Weaver >Priority: P4 > Fix For: Not applicable > > Time Spent: 20m > Remaining Estimate: 0h > > Right now it goes to > [https://beam.apache.org/get-started/downloads/#-], which is a valid > URL, but not exactly the one we want. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (BEAM-10154) Stray version number in SQL overview
[ https://issues.apache.org/jira/browse/BEAM-10154?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kyle Weaver resolved BEAM-10154. Fix Version/s: Not applicable Resolution: Fixed > Stray version number in SQL overview > > > Key: BEAM-10154 > URL: https://issues.apache.org/jira/browse/BEAM-10154 > Project: Beam > Issue Type: Bug > Components: website >Reporter: Kyle Weaver >Assignee: Kyle Weaver >Priority: P4 > Fix For: Not applicable > > Time Spent: 20m > Remaining Estimate: 0h > > Not clear what it means. Probably should just delete it. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (BEAM-10177) Remove "Review Release Notes in JIRA"
[ https://issues.apache.org/jira/browse/BEAM-10177?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kyle Weaver updated BEAM-10177: --- Fix Version/s: (was: 2.22.0) Not applicable > Remove "Review Release Notes in JIRA" > - > > Key: BEAM-10177 > URL: https://issues.apache.org/jira/browse/BEAM-10177 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Brian Hulette >Priority: P3 > Fix For: Not applicable > > Time Spent: 20m > Remaining Estimate: 0h > > Release guide: "You should verify that the issues listed automatically by > JIRA are appropriate to appear in the Release Notes." > I think it's safe to remove that now since a) the volume of jiras > (>150/release) makes that infeasible and b) we have CHANGES.md which should > replace the autogenerated release notes. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (BEAM-10177) Remove "Review Release Notes in JIRA"
[ https://issues.apache.org/jira/browse/BEAM-10177?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kyle Weaver resolved BEAM-10177. Fix Version/s: 2.22.0 Resolution: Fixed > Remove "Review Release Notes in JIRA" > - > > Key: BEAM-10177 > URL: https://issues.apache.org/jira/browse/BEAM-10177 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Brian Hulette >Priority: P3 > Fix For: 2.22.0 > > Time Spent: 20m > Remaining Estimate: 0h > > Release guide: "You should verify that the issues listed automatically by > JIRA are appropriate to appear in the Release Notes." > I think it's safe to remove that now since a) the volume of jiras > (>150/release) makes that infeasible and b) we have CHANGES.md which should > replace the autogenerated release notes. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10168) Add Github "publish release" to release guide
[ https://issues.apache.org/jira/browse/BEAM-10168?focusedWorklogId=440523&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440523 ] ASF GitHub Bot logged work on BEAM-10168: - Author: ASF GitHub Bot Created on: 03/Jun/20 00:04 Start Date: 03/Jun/20 00:04 Worklog Time Spent: 10m Work Description: ibzib opened a new pull request #11902: URL: https://github.com/apache/beam/pull/11902 R: @TheNeuralBit Thank you for your contribution! Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] [**Choose reviewer(s)**](https://beam.apache.org/contribute/#make-your-change) and mention them in a comment (`R: @username`). - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] Update `CHANGES.md` with noteworthy changes. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). See the [Contributor Guide](https://beam.apache.org/contribute) for more tips on [how to make review process smoother](https://beam.apache.org/contribute/#make-reviewers-job-easier). Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_Valid
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440521&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440521 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 23:57 Start Date: 02/Jun/20 23:57 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11898: URL: https://github.com/apache/beam/pull/11898#issuecomment-637875180 LGTM. I'll merge after tests pass. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440521) Time Spent: 1.5h (was: 1h 20m) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 1.5h > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:312) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:292) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at org.junit.runners.ParentRunner.run(ParentRunner.java:396) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.runTestClass(JUnitTestClassExecutor.java:110) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestC
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440520&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440520 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 23:56 Start Date: 02/Jun/20 23:56 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11898: URL: https://github.com/apache/beam/pull/11898#issuecomment-637874451 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440520) Time Spent: 1h 20m (was: 1h 10m) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 1h 20m > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:312) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:292) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at org.junit.runners.ParentRunner.run(ParentRunner.java:396) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.runTestClass(JUnitTestClassExecutor.java:110) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor
[jira] [Work logged] (BEAM-9785) Add PostCommit suite for Python 3.8
[ https://issues.apache.org/jira/browse/BEAM-9785?focusedWorklogId=440518&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440518 ] ASF GitHub Bot logged work on BEAM-9785: Author: ASF GitHub Bot Created on: 02/Jun/20 23:40 Start Date: 02/Jun/20 23:40 Worklog Time Spent: 10m Work Description: tvalentyn commented on pull request #11788: URL: https://github.com/apache/beam/pull/11788#issuecomment-637863213 Failed with: ``` 16:39:16 16:39:16 FAILURE: Build failed with an exception. 16:39:16 16:39:16 * What went wrong: 16:39:16 Could not determine the dependencies of task ':python38PostCommit'. 16:39:16 > Task with path ':sdks:python:test-suites:portable:py38:postCommitPy38' not found in root project 'beam'. 16:39:16 ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440518) Time Spent: 3.5h (was: 3h 20m) > Add PostCommit suite for Python 3.8 > --- > > Key: BEAM-9785 > URL: https://issues.apache.org/jira/browse/BEAM-9785 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: yoshiki obata >Assignee: Ashwin Ramaswami >Priority: P2 > Labels: stale-assigned > Time Spent: 3.5h > Remaining Estimate: 0h > > Add PostCommit suites for Python 3.8. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9785) Add PostCommit suite for Python 3.8
[ https://issues.apache.org/jira/browse/BEAM-9785?focusedWorklogId=440516&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440516 ] ASF GitHub Bot logged work on BEAM-9785: Author: ASF GitHub Bot Created on: 02/Jun/20 23:39 Start Date: 02/Jun/20 23:39 Worklog Time Spent: 10m Work Description: tvalentyn commented on pull request #11788: URL: https://github.com/apache/beam/pull/11788#issuecomment-637862976 Run Python 3.6 PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440516) Time Spent: 3h 20m (was: 3h 10m) > Add PostCommit suite for Python 3.8 > --- > > Key: BEAM-9785 > URL: https://issues.apache.org/jira/browse/BEAM-9785 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: yoshiki obata >Assignee: Ashwin Ramaswami >Priority: P2 > Labels: stale-assigned > Time Spent: 3h 20m > Remaining Estimate: 0h > > Add PostCommit suites for Python 3.8. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9785) Add PostCommit suite for Python 3.8
[ https://issues.apache.org/jira/browse/BEAM-9785?focusedWorklogId=440515&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440515 ] ASF GitHub Bot logged work on BEAM-9785: Author: ASF GitHub Bot Created on: 02/Jun/20 23:38 Start Date: 02/Jun/20 23:38 Worklog Time Spent: 10m Work Description: tvalentyn commented on pull request #11788: URL: https://github.com/apache/beam/pull/11788#issuecomment-637862778 Run Python 3.8 PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440515) Time Spent: 3h 10m (was: 3h) > Add PostCommit suite for Python 3.8 > --- > > Key: BEAM-9785 > URL: https://issues.apache.org/jira/browse/BEAM-9785 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: yoshiki obata >Assignee: Ashwin Ramaswami >Priority: P2 > Labels: stale-assigned > Time Spent: 3h 10m > Remaining Estimate: 0h > > Add PostCommit suites for Python 3.8. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-7074) FnApiRunner fails to wire multiple timer specs in single pardo
[ https://issues.apache.org/jira/browse/BEAM-7074?focusedWorklogId=440514&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440514 ] ASF GitHub Bot logged work on BEAM-7074: Author: ASF GitHub Bot Created on: 02/Jun/20 23:30 Start Date: 02/Jun/20 23:30 Worklog Time Spent: 10m Work Description: boyuanzz commented on pull request #11894: URL: https://github.com/apache/beam/pull/11894#issuecomment-637860496 > Maybe better to list the runners that should skip instead so that we don't limit coverage for new/unknown runners? Re: @tweise It's kind of hard to do that since we have different configuration for the same runner. For example, `FnApiRunner` can run with multi-worker, or cached workers. Although it's not ideal to limit a test case to specific test suite. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440514) Time Spent: 1.5h (was: 1h 20m) > FnApiRunner fails to wire multiple timer specs in single pardo > -- > > Key: BEAM-7074 > URL: https://issues.apache.org/jira/browse/BEAM-7074 > Project: Beam > Issue Type: Bug > Components: sdk-py-harness >Affects Versions: 2.11.0 >Reporter: Thomas Weise >Priority: P2 > Labels: stale-P2 > Fix For: 2.21.0 > > Time Spent: 1.5h > Remaining Estimate: 0h > > Multiple timer specs in a ParDo yield "NotImplementedError: Timers and side > inputs." -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10093) Add ZetaSQL Nexmark variant
[ https://issues.apache.org/jira/browse/BEAM-10093?focusedWorklogId=440513&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440513 ] ASF GitHub Bot logged work on BEAM-10093: - Author: ASF GitHub Bot Created on: 02/Jun/20 23:28 Start Date: 02/Jun/20 23:28 Worklog Time Spent: 10m Work Description: apilloud commented on a change in pull request #11820: URL: https://github.com/apache/beam/pull/11820#discussion_r434226724 ## File path: sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/queries/zetasql/ZetaSqlQuery0.java ## @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.nexmark.queries.zetasql; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.extensions.sql.SqlTransform; +import org.apache.beam.sdk.extensions.sql.zetasql.ZetaSQLQueryPlanner; +import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Metrics; +import org.apache.beam.sdk.nexmark.model.Bid; +import org.apache.beam.sdk.nexmark.model.Event; +import org.apache.beam.sdk.nexmark.model.Event.Type; +import org.apache.beam.sdk.nexmark.model.sql.SelectEvent; +import org.apache.beam.sdk.nexmark.queries.NexmarkQueryTransform; +import org.apache.beam.sdk.nexmark.queries.NexmarkQueryUtil; +import org.apache.beam.sdk.schemas.transforms.Convert; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.Filter; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; + +/** + * Query 0: Pass events through unchanged. + * + * This measures the overhead of the Beam ZetaSql implementation and test harness like conversion + * from Java model classes to Beam records. + * + * {@link Bid} events are used here at the moment, ås they are most numerous with default + * configuration. + */ +public class ZetaSqlQuery0 extends NexmarkQueryTransform { + + public ZetaSqlQuery0() { +super("ZetaSqlQuery0"); + } + + @Override + public PCollection expand(PCollection allEvents) { +PCollection rows = +allEvents +.apply(Filter.by(NexmarkQueryUtil.IS_BID)) +.apply(getName() + ".SelectEvent", new SelectEvent(Type.BID)); + +return rows.apply(getName() + ".Serialize", logBytesMetric(rows.getCoder())) +.setRowSchema(rows.getSchema()) +.apply( +SqlTransform.query("SELECT * FROM PCOLLECTION") +.withQueryPlannerClass(ZetaSQLQueryPlanner.class)) Review comment: Looking at this class and the other SQL classes, it looks like the only difference between `SQL` and `ZetaSQL` the SQL string and withQueryPlannerClass. I believe we expect that to be the case for all these queries. Can we take advantage of that and not copy the classes for ZetaSQL? (There are lots of ways to implement, but it seems like it would be really easy to add a factory method for each SQL dialect to the existing classes.) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440513) Time Spent: 1h 10m (was: 1h) > Add ZetaSQL Nexmark variant > --- > > Key: BEAM-10093 > URL: https://issues.apache.org/jira/browse/BEAM-10093 > Project: Beam > Issue Type: New Feature > Components: testing-nexmark >Reporter: Kenneth Knowles >Assignee: Kenneth Knowles >Priority: P2 > Time Spent: 1h 10m > Remaining Estimate: 0h > > Most queries will be identical, but best to simply stay decoupled, so this
[jira] [Work logged] (BEAM-7074) FnApiRunner fails to wire multiple timer specs in single pardo
[ https://issues.apache.org/jira/browse/BEAM-7074?focusedWorklogId=440512&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440512 ] ASF GitHub Bot logged work on BEAM-7074: Author: ASF GitHub Bot Created on: 02/Jun/20 23:24 Start Date: 02/Jun/20 23:24 Worklog Time Spent: 10m Work Description: boyuanzz commented on pull request #11894: URL: https://github.com/apache/beam/pull/11894#issuecomment-637858581 Run Python Spark ValidatesRunner This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440512) Time Spent: 1h 20m (was: 1h 10m) > FnApiRunner fails to wire multiple timer specs in single pardo > -- > > Key: BEAM-7074 > URL: https://issues.apache.org/jira/browse/BEAM-7074 > Project: Beam > Issue Type: Bug > Components: sdk-py-harness >Affects Versions: 2.11.0 >Reporter: Thomas Weise >Priority: P2 > Labels: stale-P2 > Fix For: 2.21.0 > > Time Spent: 1h 20m > Remaining Estimate: 0h > > Multiple timer specs in a ParDo yield "NotImplementedError: Timers and side > inputs." -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440511&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440511 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 23:24 Start Date: 02/Jun/20 23:24 Worklog Time Spent: 10m Work Description: KevinGG commented on a change in pull request #11898: URL: https://github.com/apache/beam/pull/11898#discussion_r434227564 ## File path: sdks/python/apache_beam/io/gcp/bigquery.py ## @@ -1702,7 +1702,8 @@ def _get_destination_uri(self, temp_location): logging.debug("gcs_location is empty, using temp_location instead") else: raise ValueError( - '{} requires a GCS location to be provided'.format( + '{} requires a GCS location to be provided. Neither option' + '--gcs_location nor the fallback --temp_location is set.'.format( Review comment: Thanks, created BEAM-10182 for this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440511) Time Spent: 1h 10m (was: 1h) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 1h 10m > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:312) > at or
[jira] [Created] (BEAM-10182) Support custom gcs location for bigquery read
Ning Kang created BEAM-10182: Summary: Support custom gcs location for bigquery read Key: BEAM-10182 URL: https://issues.apache.org/jira/browse/BEAM-10182 Project: Beam Issue Type: Improvement Components: sdk-java-core Reporter: Ning Kang Assignee: Pablo Estrada Java and Python handles the usage of temp location for bigquery io differently. Java's Read only uses and validates tempLocation but Write takes customGcsLocation and falls back to tempLocation. Python's Read takes gcs_location and falls back to temp_location while Write takes custom_gcs_temp_location and falls back to temp_location. We need an equivalence of https://github.com/apache/beam/blob/0a0399f71cf14ecabe7e73b6cd596325bb7ff2ea/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java#L2314 for the Read class. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9785) Add PostCommit suite for Python 3.8
[ https://issues.apache.org/jira/browse/BEAM-9785?focusedWorklogId=440509&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440509 ] ASF GitHub Bot logged work on BEAM-9785: Author: ASF GitHub Bot Created on: 02/Jun/20 23:13 Start Date: 02/Jun/20 23:13 Worklog Time Spent: 10m Work Description: tvalentyn commented on pull request #11788: URL: https://github.com/apache/beam/pull/11788#issuecomment-637855187 run seed job This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440509) Time Spent: 3h (was: 2h 50m) > Add PostCommit suite for Python 3.8 > --- > > Key: BEAM-9785 > URL: https://issues.apache.org/jira/browse/BEAM-9785 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: yoshiki obata >Assignee: Ashwin Ramaswami >Priority: P2 > Labels: stale-assigned > Time Spent: 3h > Remaining Estimate: 0h > > Add PostCommit suites for Python 3.8. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9785) Add PostCommit suite for Python 3.8
[ https://issues.apache.org/jira/browse/BEAM-9785?focusedWorklogId=440507&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440507 ] ASF GitHub Bot logged work on BEAM-9785: Author: ASF GitHub Bot Created on: 02/Jun/20 23:03 Start Date: 02/Jun/20 23:03 Worklog Time Spent: 10m Work Description: tvalentyn commented on pull request #11788: URL: https://github.com/apache/beam/pull/11788#issuecomment-637852060 run seed job This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440507) Time Spent: 2h 50m (was: 2h 40m) > Add PostCommit suite for Python 3.8 > --- > > Key: BEAM-9785 > URL: https://issues.apache.org/jira/browse/BEAM-9785 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: yoshiki obata >Assignee: Ashwin Ramaswami >Priority: P2 > Labels: stale-assigned > Time Spent: 2h 50m > Remaining Estimate: 0h > > Add PostCommit suites for Python 3.8. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10063) Run pandas doctests for Beam dataframes API.
[ https://issues.apache.org/jira/browse/BEAM-10063?focusedWorklogId=440505&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440505 ] ASF GitHub Bot logged work on BEAM-10063: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:55 Start Date: 02/Jun/20 22:55 Worklog Time Spent: 10m Work Description: robertwb merged pull request #11785: URL: https://github.com/apache/beam/pull/11785 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440505) Time Spent: 1h (was: 50m) > Run pandas doctests for Beam dataframes API. > > > Key: BEAM-10063 > URL: https://issues.apache.org/jira/browse/BEAM-10063 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robert Bradshaw >Assignee: Robert Bradshaw >Priority: P2 > Time Spent: 1h > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440502&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440502 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:53 Start Date: 02/Jun/20 22:53 Worklog Time Spent: 10m Work Description: pabloem commented on a change in pull request #11898: URL: https://github.com/apache/beam/pull/11898#discussion_r434218178 ## File path: sdks/python/apache_beam/io/gcp/bigquery.py ## @@ -1702,7 +1702,8 @@ def _get_destination_uri(self, temp_location): logging.debug("gcs_location is empty, using temp_location instead") else: raise ValueError( - '{} requires a GCS location to be provided'.format( + '{} requires a GCS location to be provided. Neither option' + '--gcs_location nor the fallback --temp_location is set.'.format( Review comment: hm yeah this was done after the fact. Perhaps file a JIRA to do this, and maybe assign to me. : ) - or Reuven This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440502) Time Spent: 1h (was: 50m) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 1h > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runn
[jira] [Work logged] (BEAM-10036) More flexible dataframes partitioning.
[ https://issues.apache.org/jira/browse/BEAM-10036?focusedWorklogId=440500&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440500 ] ASF GitHub Bot logged work on BEAM-10036: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:50 Start Date: 02/Jun/20 22:50 Worklog Time Spent: 10m Work Description: robertwb commented on a change in pull request #11766: URL: https://github.com/apache/beam/pull/11766#discussion_r434215127 ## File path: sdks/python/apache_beam/dataframe/expressions.py ## @@ -85,16 +87,10 @@ def evaluate_at(self, session): # type: (Session) -> T """Returns the result of self with the bindings given in session.""" raise NotImplementedError(type(self)) - def requires_partition_by_index(self): # type: () -> bool -"""Whether this expression requires its argument(s) to be partitioned -by index.""" -# TODO: It might be necessary to support partitioning by part of the index, -# for some args, which would require returning more than a boolean here. + def requires_partition_by(self): # type: () -> Partitioning raise NotImplementedError(type(self)) - def preserves_partition_by_index(self): # type: () -> bool -"""Whether the result of this expression will be partitioned by index -whenever all of its inputs are partitioned by index.""" + def preserves_partition_by(self): # type: () -> Partitioning Review comment: Yes, it's a function of both the input and the operation. E.g. an elementwise operation preserves all existing partitioning, but does not guarantee any. ## File path: sdks/python/apache_beam/dataframe/partitionings.py ## @@ -0,0 +1,133 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +from typing import Any +from typing import Iterable +from typing import TypeVar + +import pandas as pd + +Frame = TypeVar('Frame', bound=pd.core.generic.NDFrame) + + +class Partitioning(object): + """A class representing a (consistent) partitioning of dataframe objects. + """ + def is_subpartition_of(self, other): +# type: (Partitioning) -> bool + +"""Returns whether self is a sub-partition of other. + +Specifically, returns whether something partitioned by self is necissarily +also partitioned by other. +""" +raise NotImplementedError + + def partition_fn(self, df): +# type: (Frame) -> Iterable[Tuple[Any, Frame]] + +"""A callable that actually performs the partitioning of a Frame df. + +This will be invoked via a FlatMap in conjunction with a GroupKey to +achieve the desired partitioning. +""" +raise NotImplementedError + + +class Index(Partitioning): + """A partitioning by index (either fully or partially). + + If the set of "levels" of the index to consider is not specified, the entire + index is used. + + These form a partial order, given by + + Nothing() < Index([i]) < Index([i, j]) < ... < Index() < Singleton() Review comment: Clarified. ## File path: sdks/python/apache_beam/dataframe/frames_test.py ## @@ -23,6 +23,7 @@ from apache_beam.dataframe import expressions from apache_beam.dataframe import frame_base +from apache_beam.dataframe import frames # pylint: disable=unused-import Review comment: This ensures the deferred dataframe subclasses are registered for wrapping. ## File path: sdks/python/apache_beam/dataframe/frames_test.py ## @@ -23,6 +23,7 @@ from apache_beam.dataframe import expressions from apache_beam.dataframe import frame_base +from apache_beam.dataframe import frames # pylint: disable=unused-import Review comment: It makes sure the wrapper code is populated with the various types of frames. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog
[jira] [Work logged] (BEAM-10036) More flexible dataframes partitioning.
[ https://issues.apache.org/jira/browse/BEAM-10036?focusedWorklogId=440501&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440501 ] ASF GitHub Bot logged work on BEAM-10036: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:50 Start Date: 02/Jun/20 22:50 Worklog Time Spent: 10m Work Description: robertwb commented on a change in pull request #11766: URL: https://github.com/apache/beam/pull/11766#discussion_r434217321 ## File path: sdks/python/apache_beam/dataframe/partitionings.py ## @@ -0,0 +1,133 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +from typing import Any +from typing import Iterable +from typing import TypeVar + +import pandas as pd + +Frame = TypeVar('Frame', bound=pd.core.generic.NDFrame) + + +class Partitioning(object): + """A class representing a (consistent) partitioning of dataframe objects. + """ + def is_subpartition_of(self, other): Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440501) Time Spent: 50m (was: 40m) > More flexible dataframes partitioning. > -- > > Key: BEAM-10036 > URL: https://issues.apache.org/jira/browse/BEAM-10036 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robert Bradshaw >Assignee: Robert Bradshaw >Priority: P2 > Time Spent: 50m > Remaining Estimate: 0h > > Currently we only track a boolean of whether a dataframe is partitioned by > the (full) index. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10177) Remove "Review Release Notes in JIRA"
[ https://issues.apache.org/jira/browse/BEAM-10177?focusedWorklogId=440496&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440496 ] ASF GitHub Bot logged work on BEAM-10177: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:46 Start Date: 02/Jun/20 22:46 Worklog Time Spent: 10m Work Description: aaltay merged pull request #11897: URL: https://github.com/apache/beam/pull/11897 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440496) Time Spent: 20m (was: 10m) > Remove "Review Release Notes in JIRA" > - > > Key: BEAM-10177 > URL: https://issues.apache.org/jira/browse/BEAM-10177 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Brian Hulette >Priority: P3 > Time Spent: 20m > Remaining Estimate: 0h > > Release guide: "You should verify that the issues listed automatically by > JIRA are appropriate to appear in the Release Notes." > I think it's safe to remove that now since a) the volume of jiras > (>150/release) makes that infeasible and b) we have CHANGES.md which should > replace the autogenerated release notes. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10036) More flexible dataframes partitioning.
[ https://issues.apache.org/jira/browse/BEAM-10036?focusedWorklogId=440494&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440494 ] ASF GitHub Bot logged work on BEAM-10036: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:42 Start Date: 02/Jun/20 22:42 Worklog Time Spent: 10m Work Description: robertwb commented on a change in pull request #11766: URL: https://github.com/apache/beam/pull/11766#discussion_r434214501 ## File path: sdks/python/apache_beam/dataframe/partitionings.py ## @@ -0,0 +1,133 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +from typing import Any +from typing import Iterable +from typing import TypeVar + +import pandas as pd + +Frame = TypeVar('Frame', bound=pd.core.generic.NDFrame) + + +class Partitioning(object): + """A class representing a (consistent) partitioning of dataframe objects. + """ + def is_subpartition_of(self, other): +# type: (Partitioning) -> bool + +"""Returns whether self is a sub-partition of other. + +Specifically, returns whether something partitioned by self is necissarily +also partitioned by other. +""" +raise NotImplementedError + + def partition_fn(self, df): +# type: (Frame) -> Iterable[Tuple[Any, Frame]] + +"""A callable that actually performs the partitioning of a Frame df. + +This will be invoked via a FlatMap in conjunction with a GroupKey to +achieve the desired partitioning. +""" +raise NotImplementedError + + +class Index(Partitioning): + """A partitioning by index (either fully or partially). + + If the set of "levels" of the index to consider is not specified, the entire + index is used. + + These form a partial order, given by + + Nothing() < Index([i]) < Index([i, j]) < ... < Index() < Singleton() + """ + + _INDEX_PARTITIONS = 100 Review comment: Oh, I was just testing things. I'll change it back. (It would be great to get rid of this altogether, as it limits parallelism, but that's not part of this change.) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440494) Time Spent: 0.5h (was: 20m) > More flexible dataframes partitioning. > -- > > Key: BEAM-10036 > URL: https://issues.apache.org/jira/browse/BEAM-10036 > Project: Beam > Issue Type: Sub-task > Components: sdk-py-core >Reporter: Robert Bradshaw >Assignee: Robert Bradshaw >Priority: P2 > Time Spent: 0.5h > Remaining Estimate: 0h > > Currently we only track a boolean of whether a dataframe is partitioned by > the (full) index. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440492&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440492 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:35 Start Date: 02/Jun/20 22:35 Worklog Time Spent: 10m Work Description: KevinGG commented on a change in pull request #11898: URL: https://github.com/apache/beam/pull/11898#discussion_r434212247 ## File path: sdks/python/apache_beam/io/gcp/bigquery.py ## @@ -1702,7 +1702,8 @@ def _get_destination_uri(self, temp_location): logging.debug("gcs_location is empty, using temp_location instead") else: raise ValueError( - '{} requires a GCS location to be provided'.format( + '{} requires a GCS location to be provided. Neither option' + '--gcs_location nor the fallback --temp_location is set.'.format( Review comment: Thanks for the correction, Pablo! It looks like that Java and Python handles the usage of temp location for bigquery io differently. Java's `Read` only uses and validates `tempLocation` but `Write` takes `customGcsLocation` and falls back to `tempLocation`. Python's `Read` takes `gcs_location` and falls back to `temp_location` while `Write` takes `custom_gcs_temp_location` and falls back to `temp_location`. It seems that Java's `Read` should also allow specifying a `gcsLocation`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440492) Time Spent: 50m (was: 40m) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 50m > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLea
[jira] [Work logged] (BEAM-10167) Fix 2.21.0 downloads link in blog post
[ https://issues.apache.org/jira/browse/BEAM-10167?focusedWorklogId=440490&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440490 ] ASF GitHub Bot logged work on BEAM-10167: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:28 Start Date: 02/Jun/20 22:28 Worklog Time Spent: 10m Work Description: ibzib merged pull request #11880: URL: https://github.com/apache/beam/pull/11880 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440490) Time Spent: 20m (was: 10m) > Fix 2.21.0 downloads link in blog post > -- > > Key: BEAM-10167 > URL: https://issues.apache.org/jira/browse/BEAM-10167 > Project: Beam > Issue Type: Improvement > Components: website >Reporter: Kyle Weaver >Assignee: Kyle Weaver >Priority: P4 > Time Spent: 20m > Remaining Estimate: 0h > > Right now it goes to > [https://beam.apache.org/get-started/downloads/#-], which is a valid > URL, but not exactly the one we want. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10175) FhirIO execute bundle uses deprecated auth uri param
[ https://issues.apache.org/jira/browse/BEAM-10175?focusedWorklogId=440489&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440489 ] ASF GitHub Bot logged work on BEAM-10175: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:24 Start Date: 02/Jun/20 22:24 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11893: URL: https://github.com/apache/beam/pull/11893#issuecomment-637839937 Run Java PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440489) Time Spent: 1h (was: 50m) > FhirIO execute bundle uses deprecated auth uri param > > > Key: BEAM-10175 > URL: https://issues.apache.org/jira/browse/BEAM-10175 > Project: Beam > Issue Type: Improvement > Components: io-java-gcp >Affects Versions: 2.22.0 >Reporter: Jacob Ferriero >Assignee: Jacob Ferriero >Priority: P2 > Time Spent: 1h > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10101) Add a HttpIO / HttpFileSystem
[ https://issues.apache.org/jira/browse/BEAM-10101?focusedWorklogId=440486&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440486 ] ASF GitHub Bot logged work on BEAM-10101: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:08 Start Date: 02/Jun/20 22:08 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11824: URL: https://github.com/apache/beam/pull/11824#issuecomment-637831911 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440486) Time Spent: 1h 20m (was: 1h 10m) > Add a HttpIO / HttpFileSystem > - > > Key: BEAM-10101 > URL: https://issues.apache.org/jira/browse/BEAM-10101 > Project: Beam > Issue Type: Bug > Components: io-ideas >Reporter: Ashwin Ramaswami >Assignee: Ashwin Ramaswami >Priority: P2 > Time Spent: 1h 20m > Remaining Estimate: 0h > > Add HttpIO (and a related HttpFileSystem and HttpsFileSystem), which can > download files from a particular http:// or https:// URL. HttpIO cannot > upload / write to files, though, because there's no standardized way to write > to files using HTTP. > Sample usage: > > {code:python} > ( > p > | > ReadFromText("https://raw.githubusercontent.com/apache/beam/5ff5313f0913ec81d31ad306400ad30c0a928b34/NOTICE";) > | WriteToText("output.txt", shard_name_template="", num_shards=0) > ) > {code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (BEAM-10179) Remove URNJavaDoFn since the workaround is no longer required
[ https://issues.apache.org/jira/browse/BEAM-10179?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Luke Cwik resolved BEAM-10179. -- Fix Version/s: 2.23.0 Resolution: Fixed > Remove URNJavaDoFn since the workaround is no longer required > - > > Key: BEAM-10179 > URL: https://issues.apache.org/jira/browse/BEAM-10179 > Project: Beam > Issue Type: Improvement > Components: sdk-go >Reporter: Luke Cwik >Assignee: Luke Cwik >Priority: P2 > Fix For: 2.23.0 > > Time Spent: 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10179) Remove URNJavaDoFn since the workaround is no longer required
[ https://issues.apache.org/jira/browse/BEAM-10179?focusedWorklogId=440485&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440485 ] ASF GitHub Bot logged work on BEAM-10179: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:07 Start Date: 02/Jun/20 22:07 Worklog Time Spent: 10m Work Description: lukecwik merged pull request #11899: URL: https://github.com/apache/beam/pull/11899 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440485) Time Spent: 40m (was: 0.5h) > Remove URNJavaDoFn since the workaround is no longer required > - > > Key: BEAM-10179 > URL: https://issues.apache.org/jira/browse/BEAM-10179 > Project: Beam > Issue Type: Improvement > Components: sdk-go >Reporter: Luke Cwik >Assignee: Luke Cwik >Priority: P2 > Time Spent: 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10101) Add a HttpIO / HttpFileSystem
[ https://issues.apache.org/jira/browse/BEAM-10101?focusedWorklogId=440483&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440483 ] ASF GitHub Bot logged work on BEAM-10101: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:06 Start Date: 02/Jun/20 22:06 Worklog Time Spent: 10m Work Description: pabloem commented on a change in pull request #11824: URL: https://github.com/apache/beam/pull/11824#discussion_r434194515 ## File path: sdks/python/apache_beam/io/httpio.py ## @@ -0,0 +1,167 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""This class implements methods to interact with files at HTTP URLs. + +This I/O only implements methods to read with files at HTTP URLs, because +of the variability in methods by which HTTP content can be written +to a server. If you need to write your results to an HTTP endpoint, +you might want to make your own I/O or use another, more specific, +I/O connector. + +""" + +# pytype: skip-file + +from __future__ import absolute_import + +import io +from builtins import object + +from apache_beam.io.filesystemio import Downloader +from apache_beam.io.filesystemio import DownloaderStream +from apache_beam.internal.http_client import get_new_http +import sys + + +class HttpIO(object): + """HTTP I/O.""" + + def __init__(self, client = None): +if sys.version_info[0] != 3: + raise RuntimeError("HttpIO only supports Python 3.") +self._client = client or get_new_http() +pass + + def open( + self, + uri, + mode='r', + read_buffer_size=16 * 1024 * 1024): + """Open a URL for reading or writing. + + Args: +uri (str): HTTP URL in the form ``http://[path]`` or ``https://[path]``. +mode (str): ``'r'`` or ``'rb'`` for reading. +read_buffer_size (int): Buffer size to use during read operations. + + Returns: +A file object representing the response. + + Raises: +ValueError: Invalid open file mode. + """ +if mode == 'r' or mode == 'rb': + downloader = HttpDownloader(uri, self._client) + return io.BufferedReader( +DownloaderStream(downloader, mode=mode), buffer_size=read_buffer_size) +else: + raise ValueError('Invalid file open mode: %s.' % mode) + + def list_prefix(self, path): +"""Lists files matching the prefix. + +Because there is no common standard for listing files at a given +HTTP URL, this method just returns a single file at the given URL. +This means that listing files only works with an exact path, not +with a glob expression. + +Args: + path: HTTP URL in the form http://[path] or https://[path]. + +Returns: + Dictionary of file name -> size. +""" +return {path: self.size(path)} + + def size(self, uri): +"""Returns the size of a single file stored at a HTTP URL. + +First, the client attempts to make a HEAD request for a non-gzipped version of the file, +and uses the Content-Length header to retrieve the size. If that fails because the server +does not attempt HEAD requests, the client just does a GET requuest to retrieve the length. + +Args: + path: HTTP URL in the form http://[path] or https://[path]. + +Returns: + Size of the HTTP file in bytes. +""" +try: + # Pass in "" for "Accept-Encoding" because we want the non-gzipped content-length. + resp, content = self._client.request(uri, method='HEAD', headers={"Accept-Encoding": ""}) + if resp.status != 200: +raise Exception(resp.status, resp.reason) Review comment: can you raise an IO error? ## File path: sdks/python/apache_beam/io/httpfilesystem.py ## @@ -0,0 +1,210 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy
[jira] [Work logged] (BEAM-10181) pull_licenses script should create python3 virtualenv
[ https://issues.apache.org/jira/browse/BEAM-10181?focusedWorklogId=440479&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440479 ] ASF GitHub Bot logged work on BEAM-10181: - Author: ASF GitHub Bot Created on: 02/Jun/20 22:00 Start Date: 02/Jun/20 22:00 Worklog Time Spent: 10m Work Description: TheNeuralBit opened a new pull request #11900: URL: https://github.com/apache/beam/pull/11900 R: @udim Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming/lastCompletedBuild/) Python | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/) | --- | [![Bui
[jira] [Work logged] (BEAM-9742) Add ability to pass FluentBackoff to JdbcIo.Write
[ https://issues.apache.org/jira/browse/BEAM-9742?focusedWorklogId=440477&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440477 ] ASF GitHub Bot logged work on BEAM-9742: Author: ASF GitHub Bot Created on: 02/Jun/20 21:59 Start Date: 02/Jun/20 21:59 Worklog Time Spent: 10m Work Description: Akshay-Iyangar commented on pull request #11396: URL: https://github.com/apache/beam/pull/11396#issuecomment-637828525 @aromanenko-dev @lukecwik could you'll please have a look? Thanks This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440477) Time Spent: 3h 20m (was: 3h 10m) > Add ability to pass FluentBackoff to JdbcIo.Write > - > > Key: BEAM-9742 > URL: https://issues.apache.org/jira/browse/BEAM-9742 > Project: Beam > Issue Type: Improvement > Components: io-java-jdbc >Reporter: Akshay Iyangar >Assignee: Akshay Iyangar >Priority: P3 > Time Spent: 3h 20m > Remaining Estimate: 0h > > Currently, the FluentBackoff is hardcoded with `maxRetries` and > `initialBackoff` . > It would be helpful if the client were able to pass these values. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Created] (BEAM-10181) pull_licenses script should create python3 virtualenv
Brian Hulette created BEAM-10181: Summary: pull_licenses script should create python3 virtualenv Key: BEAM-10181 URL: https://issues.apache.org/jira/browse/BEAM-10181 Project: Beam Issue Type: Bug Components: build-system Reporter: Brian Hulette Assignee: Brian Hulette Fix For: 2.22.0 Setting fix version as 2.22.0 since this is preventing me from building release containers. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (BEAM-10181) pull_licenses script should create python3 virtualenv
[ https://issues.apache.org/jira/browse/BEAM-10181?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kenneth Knowles updated BEAM-10181: --- Status: Open (was: Triage Needed) > pull_licenses script should create python3 virtualenv > - > > Key: BEAM-10181 > URL: https://issues.apache.org/jira/browse/BEAM-10181 > Project: Beam > Issue Type: Bug > Components: build-system >Reporter: Brian Hulette >Assignee: Brian Hulette >Priority: P2 > Fix For: 2.22.0 > > > Setting fix version as 2.22.0 since this is preventing me from building > release containers. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10179) Remove URNJavaDoFn since the workaround is no longer required
[ https://issues.apache.org/jira/browse/BEAM-10179?focusedWorklogId=440475&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440475 ] ASF GitHub Bot logged work on BEAM-10179: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:55 Start Date: 02/Jun/20 21:55 Worklog Time Spent: 10m Work Description: lostluck commented on pull request #11899: URL: https://github.com/apache/beam/pull/11899#issuecomment-637826896 Run Go PostCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440475) Time Spent: 0.5h (was: 20m) > Remove URNJavaDoFn since the workaround is no longer required > - > > Key: BEAM-10179 > URL: https://issues.apache.org/jira/browse/BEAM-10179 > Project: Beam > Issue Type: Improvement > Components: sdk-go >Reporter: Luke Cwik >Assignee: Luke Cwik >Priority: P2 > Time Spent: 0.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-3489) Expose the message id of received messages within PubsubMessage
[ https://issues.apache.org/jira/browse/BEAM-3489?focusedWorklogId=440474&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440474 ] ASF GitHub Bot logged work on BEAM-3489: Author: ASF GitHub Bot Created on: 02/Jun/20 21:54 Start Date: 02/Jun/20 21:54 Worklog Time Spent: 10m Work Description: lukecwik commented on a change in pull request #11873: URL: https://github.com/apache/beam/pull/11873#discussion_r434197026 ## File path: runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java ## @@ -1353,14 +1353,14 @@ public void translate(StreamingPubsubIORead transform, TranslationContext contex PropertyNames.PUBSUB_ID_ATTRIBUTE, overriddenTransform.getIdAttribute()); } // In both cases, the transform needs to read PubsubMessage. However, in case it needs - // the attributes, we supply an identity "parse fn" so the worker will read PubsubMessage's - // from Windmill and simply pass them around; and in case it doesn't need attributes, - // we're already implicitly using a "Coder" that interprets the data as a PubsubMessage's - // payload. - if (overriddenTransform.getNeedsAttributes()) { + // the attributes or messageId, we supply an identity "parse fn" so the worker will + // read PubsubMessage's from Windmill and simply pass them around; and in case it + // doesn't need attributes, we're already implicitly using a "Coder" that interprets + // the data as a PubsubMessage's payload. + if (overriddenTransform.getNeedsAttributes() | overriddenTransform.getNeedsMessageId()) { Review comment: Other then this change, this looks good to me. We'll see what the tests have to say. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440474) Time Spent: 9h 10m (was: 9h) > Expose the message id of received messages within PubsubMessage > --- > > Key: BEAM-3489 > URL: https://issues.apache.org/jira/browse/BEAM-3489 > Project: Beam > Issue Type: New Feature > Components: io-java-gcp >Reporter: Luke Cwik >Assignee: Thinh Ha >Priority: P3 > Labels: newbie, starter > Time Spent: 9h 10m > Remaining Estimate: 0h > > This task is about passing forward the message id from the pubsub proto to > the java PubsubMessage. > Add a message id field to PubsubMessage. > Update the coder for PubsubMessage to encode the message id. > Update the translation from the Pubsub proto message to the Dataflow message: > https://github.com/apache/beam/blob/2e275264b21db45787833502e5e42907b05e28b8/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSource.java#L976 -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Created] (BEAM-10180) Upgrade httplib2 to > 0.18.0 to resolve CVE-2020-11078
Jay Crumb created BEAM-10180: Summary: Upgrade httplib2 to > 0.18.0 to resolve CVE-2020-11078 Key: BEAM-10180 URL: https://issues.apache.org/jira/browse/BEAM-10180 Project: Beam Issue Type: Improvement Components: sdk-py-core Reporter: Jay Crumb In versions of httplib2 before 0.18.0, an attacker who could control the url provided to {{httplib2.Http.request()}} could modify the request's headers or body. As I understand from looking at BEAM-9819 the current restriction exists because of a dependency on google-apitools so this may not be a straightforward fix. CVE: [https://nvd.nist.gov/vuln/detail/CVE-2020-11078] GitHub Advisory: [https://github.com/advisories/GHSA-gg84-qgv9-w4pq] Release Notes: https://github.com/httplib2/httplib2/blob/master/CHANGELOG#L7 -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10179) Remove URNJavaDoFn since the workaround is no longer required
[ https://issues.apache.org/jira/browse/BEAM-10179?focusedWorklogId=440472&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440472 ] ASF GitHub Bot logged work on BEAM-10179: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:47 Start Date: 02/Jun/20 21:47 Worklog Time Spent: 10m Work Description: lukecwik commented on pull request #11899: URL: https://github.com/apache/beam/pull/11899#issuecomment-637823925 R: @lostluck @youngoli This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440472) Time Spent: 20m (was: 10m) > Remove URNJavaDoFn since the workaround is no longer required > - > > Key: BEAM-10179 > URL: https://issues.apache.org/jira/browse/BEAM-10179 > Project: Beam > Issue Type: Improvement > Components: sdk-go >Reporter: Luke Cwik >Assignee: Luke Cwik >Priority: P2 > Time Spent: 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10179) Remove URNJavaDoFn since the workaround is no longer required
[ https://issues.apache.org/jira/browse/BEAM-10179?focusedWorklogId=440471&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440471 ] ASF GitHub Bot logged work on BEAM-10179: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:46 Start Date: 02/Jun/20 21:46 Worklog Time Spent: 10m Work Description: lukecwik opened a new pull request #11899: URL: https://github.com/apache/beam/pull/11899 Now that the SDK has migrated to use the Dataflow runner v2, this is no longer required. Thank you for your contribution! Follow this checklist to help us incorporate your contribution quickly and easily: - [ ] [**Choose reviewer(s)**](https://beam.apache.org/contribute/#make-your-change) and mention them in a comment (`R: @username`). - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue, if applicable. This will automatically link the pull request to the issue. - [ ] Update `CHANGES.md` with noteworthy changes. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf). See the [Contributor Guide](https://beam.apache.org/contribute) for more tips on [how to make review process smoother](https://beam.apache.org/contribute/#make-reviewers-job-easier). Post-Commit Tests Status (on master branch) Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark --- | --- | --- | --- | --- | --- | --- | --- Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/)
[jira] [Created] (BEAM-10179) Remove URNJavaDoFn since the workaround is no longer required
Luke Cwik created BEAM-10179: Summary: Remove URNJavaDoFn since the workaround is no longer required Key: BEAM-10179 URL: https://issues.apache.org/jira/browse/BEAM-10179 Project: Beam Issue Type: Improvement Components: sdk-go Reporter: Luke Cwik Assignee: Luke Cwik -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (BEAM-10179) Remove URNJavaDoFn since the workaround is no longer required
[ https://issues.apache.org/jira/browse/BEAM-10179?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kenneth Knowles updated BEAM-10179: --- Status: Open (was: Triage Needed) > Remove URNJavaDoFn since the workaround is no longer required > - > > Key: BEAM-10179 > URL: https://issues.apache.org/jira/browse/BEAM-10179 > Project: Beam > Issue Type: Improvement > Components: sdk-go >Reporter: Luke Cwik >Assignee: Luke Cwik >Priority: P2 > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (BEAM-10173) Upgrade zstd-jni to version 1.4.5-2
[ https://issues.apache.org/jira/browse/BEAM-10173?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ismaël Mejía resolved BEAM-10173. - Fix Version/s: 2.23.0 Resolution: Fixed > Upgrade zstd-jni to version 1.4.5-2 > --- > > Key: BEAM-10173 > URL: https://issues.apache.org/jira/browse/BEAM-10173 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: P2 > Fix For: 2.23.0 > > Time Spent: 40m > Remaining Estimate: 0h > > [Facebook recently released zstd > 1.4.5|https://github.com/facebook/zstd/releases/tag/v1.4.5] with good > performance improvements. Notice that zstd is an optional dependency on Beam > (used only in tests) but this is good for end user reference. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10101) Add a HttpIO / HttpFileSystem
[ https://issues.apache.org/jira/browse/BEAM-10101?focusedWorklogId=440468&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440468 ] ASF GitHub Bot logged work on BEAM-10101: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:39 Start Date: 02/Jun/20 21:39 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11824: URL: https://github.com/apache/beam/pull/11824#issuecomment-637821019 ok just looking at this now... This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440468) Time Spent: 1h (was: 50m) > Add a HttpIO / HttpFileSystem > - > > Key: BEAM-10101 > URL: https://issues.apache.org/jira/browse/BEAM-10101 > Project: Beam > Issue Type: Bug > Components: io-ideas >Reporter: Ashwin Ramaswami >Assignee: Ashwin Ramaswami >Priority: P2 > Time Spent: 1h > Remaining Estimate: 0h > > Add HttpIO (and a related HttpFileSystem and HttpsFileSystem), which can > download files from a particular http:// or https:// URL. HttpIO cannot > upload / write to files, though, because there's no standardized way to write > to files using HTTP. > Sample usage: > > {code:python} > ( > p > | > ReadFromText("https://raw.githubusercontent.com/apache/beam/5ff5313f0913ec81d31ad306400ad30c0a928b34/NOTICE";) > | WriteToText("output.txt", shard_name_template="", num_shards=0) > ) > {code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9977) Build Kafka Read on top of Java SplittableDoFn
[ https://issues.apache.org/jira/browse/BEAM-9977?focusedWorklogId=440467&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440467 ] ASF GitHub Bot logged work on BEAM-9977: Author: ASF GitHub Bot Created on: 02/Jun/20 21:39 Start Date: 02/Jun/20 21:39 Worklog Time Spent: 10m Work Description: boyuanzz commented on pull request #11749: URL: https://github.com/apache/beam/pull/11749#issuecomment-637820927 > @boyuanzz Yes, afaik, it's used only for [performance testing](http://metrics.beam.apache.org/d/bnlHKP3Wz/java-io-it-tests-dataflow?panelId=21&fullscreen&orgId=1&from=1588539922272&to=1591131922272) on Dataflow Thanks! I'm testing it with Dataflow internally. Enabling related java tests required `runner_v2`. I think the best way now to add tests for this transform would be using python x-lang test. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440467) Time Spent: 6.5h (was: 6h 20m) > Build Kafka Read on top of Java SplittableDoFn > -- > > Key: BEAM-9977 > URL: https://issues.apache.org/jira/browse/BEAM-9977 > Project: Beam > Issue Type: New Feature > Components: io-java-kafka >Reporter: Boyuan Zhang >Assignee: Boyuan Zhang >Priority: P2 > Time Spent: 6.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10173) Upgrade zstd-jni to version 1.4.5-2
[ https://issues.apache.org/jira/browse/BEAM-10173?focusedWorklogId=440469&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440469 ] ASF GitHub Bot logged work on BEAM-10173: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:39 Start Date: 02/Jun/20 21:39 Worklog Time Spent: 10m Work Description: iemejia merged pull request #11888: URL: https://github.com/apache/beam/pull/11888 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440469) Time Spent: 40m (was: 0.5h) > Upgrade zstd-jni to version 1.4.5-2 > --- > > Key: BEAM-10173 > URL: https://issues.apache.org/jira/browse/BEAM-10173 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core >Reporter: Ismaël Mejía >Assignee: Ismaël Mejía >Priority: P2 > Time Spent: 40m > Remaining Estimate: 0h > > [Facebook recently released zstd > 1.4.5|https://github.com/facebook/zstd/releases/tag/v1.4.5] with good > performance improvements. Notice that zstd is an optional dependency on Beam > (used only in tests) but this is good for end user reference. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-3489) Expose the message id of received messages within PubsubMessage
[ https://issues.apache.org/jira/browse/BEAM-3489?focusedWorklogId=440462&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440462 ] ASF GitHub Bot logged work on BEAM-3489: Author: ASF GitHub Bot Created on: 02/Jun/20 21:34 Start Date: 02/Jun/20 21:34 Worklog Time Spent: 10m Work Description: lukecwik commented on a change in pull request #11873: URL: https://github.com/apache/beam/pull/11873#discussion_r434188515 ## File path: runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java ## @@ -1353,14 +1353,14 @@ public void translate(StreamingPubsubIORead transform, TranslationContext contex PropertyNames.PUBSUB_ID_ATTRIBUTE, overriddenTransform.getIdAttribute()); } // In both cases, the transform needs to read PubsubMessage. However, in case it needs - // the attributes, we supply an identity "parse fn" so the worker will read PubsubMessage's - // from Windmill and simply pass them around; and in case it doesn't need attributes, - // we're already implicitly using a "Coder" that interprets the data as a PubsubMessage's - // payload. - if (overriddenTransform.getNeedsAttributes()) { + // the attributes or messageId, we supply an identity "parse fn" so the worker will + // read PubsubMessage's from Windmill and simply pass them around; and in case it + // doesn't need attributes, we're already implicitly using a "Coder" that interprets + // the data as a PubsubMessage's payload. + if (overriddenTransform.getNeedsAttributes() | overriddenTransform.getNeedsMessageId()) { Review comment: Minor suggestion: ```suggestion if (overriddenTransform.getNeedsAttributes() || overriddenTransform.getNeedsMessageId()) { ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440462) Time Spent: 9h (was: 8h 50m) > Expose the message id of received messages within PubsubMessage > --- > > Key: BEAM-3489 > URL: https://issues.apache.org/jira/browse/BEAM-3489 > Project: Beam > Issue Type: New Feature > Components: io-java-gcp >Reporter: Luke Cwik >Assignee: Thinh Ha >Priority: P3 > Labels: newbie, starter > Time Spent: 9h > Remaining Estimate: 0h > > This task is about passing forward the message id from the pubsub proto to > the java PubsubMessage. > Add a message id field to PubsubMessage. > Update the coder for PubsubMessage to encode the message id. > Update the translation from the Pubsub proto message to the Dataflow message: > https://github.com/apache/beam/blob/2e275264b21db45787833502e5e42907b05e28b8/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubUnboundedSource.java#L976 -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9008) Add readAll() method to CassandraIO
[ https://issues.apache.org/jira/browse/BEAM-9008?focusedWorklogId=440459&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440459 ] ASF GitHub Bot logged work on BEAM-9008: Author: ASF GitHub Bot Created on: 02/Jun/20 21:28 Start Date: 02/Jun/20 21:28 Worklog Time Spent: 10m Work Description: iemejia commented on a change in pull request #10546: URL: https://github.com/apache/beam/pull/10546#discussion_r433896372 ## File path: sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/RingRange.java ## @@ -55,4 +58,9 @@ public boolean isWrapping() { public String toString() { return String.format("(%s,%s]", start.toString(), end.toString()); } + + public static RingRange fromEncodedKey(Metadata metadata, ByteBuffer... bb) { Review comment: Can we move this method to the test class where it is used. I don't want to add Cassandra specific `Metadata` to the public API of RingRange with the hope this will help us evolve RingRange into a proper `Restriction` (future work out of the scope of this PR) Can you also please add a `public static RingRange of(BigInteger start, BigInteger send)` method and make the normal constructor private and refactor in every use. ## File path: sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java ## @@ -326,7 +371,78 @@ private CassandraIO() {} checkArgument(entity() != null, "withEntity() is required"); checkArgument(coder() != null, "withCoder() is required"); - return input.apply(org.apache.beam.sdk.io.Read.from(new CassandraSource<>(this, null))); + ReadAll readAll = CassandraIO.readAll().withCoder(this.coder()); + + return input + .apply(Create.of(this)) + .apply(ParDo.of(new SplitFn())) Review comment: Move the Split as the first step of the `ReadAll` expansion so non advanced users (those who do not specify `RingRange` manually could get their code 'partitioned' correctly. ## File path: sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java ## @@ -317,9 +302,70 @@ public void testRead() throws Exception { PAssert.that(mapped.apply("Count occurrences per scientist", Count.perKey())) .satisfies( input -> { + int count = 0; for (KV element : input) { +count++; assertEquals(element.getKey(), NUM_ROWS / 10, element.getValue().longValue()); } + assertEquals(11, count); + return null; +}); + +pipeline.run(); + } + + CassandraIO.Read getReadWithRingRange(RingRange... rr) { Review comment: private ## File path: sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java ## @@ -191,39 +186,44 @@ private static void insertData() throws Exception { LOG.info("Create Cassandra tables"); session.execute( String.format( -"CREATE TABLE IF NOT EXISTS %s.%s(person_id int, person_name text, PRIMARY KEY" -+ "(person_id));", +"CREATE TABLE IF NOT EXISTS %s.%s(person_department text, person_id int, person_name text, PRIMARY KEY" Review comment: nit: For the tests can we assure that every reference to the tables and Scientist object usage follows this order: id, name, department. ## File path: sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java ## @@ -480,66 +527,22 @@ public void testCustomMapperImplDelete() { assertEquals(1, counter.intValue()); } - @Test - public void testSplit() throws Exception { -PipelineOptions options = PipelineOptionsFactory.create(); -CassandraIO.Read read = -CassandraIO.read() -.withHosts(Collections.singletonList(CASSANDRA_HOST)) -.withPort(cassandraPort) -.withKeyspace(CASSANDRA_KEYSPACE) -.withTable(CASSANDRA_TABLE) -.withEntity(Scientist.class) -.withCoder(SerializableCoder.of(Scientist.class)); - -// initialSource will be read without splitting (which does not happen in production) -// so we need to provide splitQueries to avoid NPE in source.reader.start() -String splitQuery = QueryBuilder.select().from(CASSANDRA_KEYSPACE, CASSANDRA_TABLE).toString(); -CassandraIO.CassandraSource initialSource = -new CassandraIO.CassandraSource<>(read, Collections.singletonList(splitQuery)); -int desiredBundleSizeBytes = 2048; -long estimatedSize = initialSource.getEstimatedSizeBytes(options); -List> splits = initialSource.split(desiredBundleSizeBytes, options); -SourceTestUtils.assertSourcesEqualReferenceSource(initialSource, splits, options); -float
[jira] [Work logged] (BEAM-10027) Support for Kotlin-based Beam Katas
[ https://issues.apache.org/jira/browse/BEAM-10027?focusedWorklogId=440455&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440455 ] ASF GitHub Bot logged work on BEAM-10027: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:23 Start Date: 02/Jun/20 21:23 Worklog Time Spent: 10m Work Description: rionmonster commented on pull request #11761: URL: https://github.com/apache/beam/pull/11761#issuecomment-637814323 @henryken / @pabloem Thanks to both of you guys for helping me through this process, both in review and getting it merged in. I shot out an e-mail on the user mailing list, and will likely write up a short blog post on my personal blog when I get a chance. If there's a more appropriate place to add links to the course, or another PR I could push up to the Beam site, please let me know and I can put together something for that as well. I know I've personally run into quite a few "gotchas" related to iterations between Beam and Kotlin, so it wouldn't hurt to document those somewhere (e.g. a blog post) as well. Thanks again guys! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440455) Time Spent: 15.5h (was: 15h 20m) > Support for Kotlin-based Beam Katas > --- > > Key: BEAM-10027 > URL: https://issues.apache.org/jira/browse/BEAM-10027 > Project: Beam > Issue Type: Improvement > Components: katas >Reporter: Rion Williams >Assignee: Rion Williams >Priority: P2 > Original Estimate: 8h > Time Spent: 15.5h > Remaining Estimate: 0h > > Currently, there are a series of examples available demonstrating the use of > Apache Beam with Kotlin. It would be nice to have support for the same Beam > Katas that exist for Python, Go, and Java to also support Kotlin. > The port itself shouldn't be that involved since it can still target the JVM, > so it would likely just require the inclusion for Kotlin dependencies and a > conversion for all of the existing Java examples. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-7074) FnApiRunner fails to wire multiple timer specs in single pardo
[ https://issues.apache.org/jira/browse/BEAM-7074?focusedWorklogId=440454&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440454 ] ASF GitHub Bot logged work on BEAM-7074: Author: ASF GitHub Bot Created on: 02/Jun/20 21:19 Start Date: 02/Jun/20 21:19 Worklog Time Spent: 10m Work Description: tweise commented on a change in pull request #11894: URL: https://github.com/apache/beam/pull/11894#discussion_r434181735 ## File path: sdks/python/apache_beam/runners/portability/portable_runner_test.py ## @@ -241,7 +241,8 @@ def process(self, kv, index=beam.DoFn.StateParam(index_state_spec)): equal_to(expected)) # Inherits all other tests from fn_api_runner_test.FnApiRunnerTest - + def test_pardo_timers_clear(self): Review comment: This may also be used for Spark. Skip should probably only occur in leaf classes that strictly need it. Otherwise, we are almost back to what we had before ;-) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440454) Time Spent: 1h 10m (was: 1h) > FnApiRunner fails to wire multiple timer specs in single pardo > -- > > Key: BEAM-7074 > URL: https://issues.apache.org/jira/browse/BEAM-7074 > Project: Beam > Issue Type: Bug > Components: sdk-py-harness >Affects Versions: 2.11.0 >Reporter: Thomas Weise >Priority: P2 > Labels: stale-P2 > Fix For: 2.21.0 > > Time Spent: 1h 10m > Remaining Estimate: 0h > > Multiple timer specs in a ParDo yield "NotImplementedError: Timers and side > inputs." -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10175) FhirIO execute bundle uses deprecated auth uri param
[ https://issues.apache.org/jira/browse/BEAM-10175?focusedWorklogId=440450&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440450 ] ASF GitHub Bot logged work on BEAM-10175: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:16 Start Date: 02/Jun/20 21:16 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11893: URL: https://github.com/apache/beam/pull/11893#issuecomment-637810880 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440450) Time Spent: 50m (was: 40m) > FhirIO execute bundle uses deprecated auth uri param > > > Key: BEAM-10175 > URL: https://issues.apache.org/jira/browse/BEAM-10175 > Project: Beam > Issue Type: Improvement > Components: io-java-gcp >Affects Versions: 2.22.0 >Reporter: Jacob Ferriero >Assignee: Jacob Ferriero >Priority: P2 > Time Spent: 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-7074) FnApiRunner fails to wire multiple timer specs in single pardo
[ https://issues.apache.org/jira/browse/BEAM-7074?focusedWorklogId=440448&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440448 ] ASF GitHub Bot logged work on BEAM-7074: Author: ASF GitHub Bot Created on: 02/Jun/20 21:10 Start Date: 02/Jun/20 21:10 Worklog Time Spent: 10m Work Description: boyuanzz commented on a change in pull request #11894: URL: https://github.com/apache/beam/pull/11894#discussion_r434177432 ## File path: sdks/python/apache_beam/runners/portability/portable_runner_test.py ## @@ -241,7 +241,8 @@ def process(self, kv, index=beam.DoFn.StateParam(index_state_spec)): equal_to(expected)) # Inherits all other tests from fn_api_runner_test.FnApiRunnerTest - + def test_pardo_timers_clear(self): Review comment: Ah yes. I thought `FlinkRunnerTest` is inherited from `FnRunnerTest`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440448) Time Spent: 1h (was: 50m) > FnApiRunner fails to wire multiple timer specs in single pardo > -- > > Key: BEAM-7074 > URL: https://issues.apache.org/jira/browse/BEAM-7074 > Project: Beam > Issue Type: Bug > Components: sdk-py-harness >Affects Versions: 2.11.0 >Reporter: Thomas Weise >Priority: P2 > Labels: stale-P2 > Fix For: 2.21.0 > > Time Spent: 1h > Remaining Estimate: 0h > > Multiple timer specs in a ParDo yield "NotImplementedError: Timers and side > inputs." -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-7074) FnApiRunner fails to wire multiple timer specs in single pardo
[ https://issues.apache.org/jira/browse/BEAM-7074?focusedWorklogId=440447&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440447 ] ASF GitHub Bot logged work on BEAM-7074: Author: ASF GitHub Bot Created on: 02/Jun/20 21:07 Start Date: 02/Jun/20 21:07 Worklog Time Spent: 10m Work Description: tweise commented on a change in pull request #11894: URL: https://github.com/apache/beam/pull/11894#discussion_r434176084 ## File path: sdks/python/apache_beam/runners/portability/portable_runner_test.py ## @@ -241,7 +241,8 @@ def process(self, kv, index=beam.DoFn.StateParam(index_state_spec)): equal_to(expected)) # Inherits all other tests from fn_api_runner_test.FnApiRunnerTest - + def test_pardo_timers_clear(self): Review comment: Doesn't this turn off the test for `FlinkRunnerTest` and anything else that extends `PortableRunner`? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440447) Time Spent: 50m (was: 40m) > FnApiRunner fails to wire multiple timer specs in single pardo > -- > > Key: BEAM-7074 > URL: https://issues.apache.org/jira/browse/BEAM-7074 > Project: Beam > Issue Type: Bug > Components: sdk-py-harness >Affects Versions: 2.11.0 >Reporter: Thomas Weise >Priority: P2 > Labels: stale-P2 > Fix For: 2.21.0 > > Time Spent: 50m > Remaining Estimate: 0h > > Multiple timer specs in a ParDo yield "NotImplementedError: Timers and side > inputs." -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9977) Build Kafka Read on top of Java SplittableDoFn
[ https://issues.apache.org/jira/browse/BEAM-9977?focusedWorklogId=440446&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440446 ] ASF GitHub Bot logged work on BEAM-9977: Author: ASF GitHub Bot Created on: 02/Jun/20 21:07 Start Date: 02/Jun/20 21:07 Worklog Time Spent: 10m Work Description: aromanenko-dev commented on pull request #11749: URL: https://github.com/apache/beam/pull/11749#issuecomment-637806753 Run Java PreCommit This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440446) Time Spent: 6h 20m (was: 6h 10m) > Build Kafka Read on top of Java SplittableDoFn > -- > > Key: BEAM-9977 > URL: https://issues.apache.org/jira/browse/BEAM-9977 > Project: Beam > Issue Type: New Feature > Components: io-java-kafka >Reporter: Boyuan Zhang >Assignee: Boyuan Zhang >Priority: P2 > Time Spent: 6h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-8841) Add ability to perform BigQuery file loads using avro
[ https://issues.apache.org/jira/browse/BEAM-8841?focusedWorklogId=440444&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440444 ] ASF GitHub Bot logged work on BEAM-8841: Author: ASF GitHub Bot Created on: 02/Jun/20 21:06 Start Date: 02/Jun/20 21:06 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11896: URL: https://github.com/apache/beam/pull/11896#issuecomment-637806282 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440444) Time Spent: 8.5h (was: 8h 20m) > Add ability to perform BigQuery file loads using avro > - > > Key: BEAM-8841 > URL: https://issues.apache.org/jira/browse/BEAM-8841 > Project: Beam > Issue Type: Improvement > Components: io-py-gcp >Reporter: Chun Yang >Assignee: Chun Yang >Priority: P3 > Fix For: 2.21.0 > > Time Spent: 8.5h > Remaining Estimate: 0h > > Currently, JSON format is used for file loads into BigQuery in the Python > SDK. JSON has some disadvantages including size of serialized data and > inability to represent NaN and infinity float values. > BigQuery supports loading files in avro format, which can overcome these > disadvantages. The Java SDK already supports loading files using avro format > (BEAM-2879) so it makes sense to support it in the Python SDK as well. > The change will be somewhere around > [{{BigQueryBatchFileLoads}}|https://github.com/apache/beam/blob/3e7865ee6c6a56e51199515ec5b4b16de1ddd166/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py#L554]. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (BEAM-9621) Python SqlTransform follow-ups
[ https://issues.apache.org/jira/browse/BEAM-9621?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17124315#comment-17124315 ] Brian Hulette commented on BEAM-9621: - This is a tracking jira. Not sure how to make Beam Jira Bot happy with it. Dropped priority to P4. > Python SqlTransform follow-ups > -- > > Key: BEAM-9621 > URL: https://issues.apache.org/jira/browse/BEAM-9621 > Project: Beam > Issue Type: Improvement > Components: cross-language, dsl-sql, sdk-py-core >Reporter: Brian Hulette >Assignee: Brian Hulette >Priority: P4 > > Tracking JIRA for follow-up work to improve SqlTransform in Python -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (BEAM-9621) Python SqlTransform follow-ups
[ https://issues.apache.org/jira/browse/BEAM-9621?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Brian Hulette updated BEAM-9621: Labels: (was: stale-assigned) > Python SqlTransform follow-ups > -- > > Key: BEAM-9621 > URL: https://issues.apache.org/jira/browse/BEAM-9621 > Project: Beam > Issue Type: Improvement > Components: cross-language, dsl-sql, sdk-py-core >Reporter: Brian Hulette >Assignee: Brian Hulette >Priority: P4 > > Tracking JIRA for follow-up work to improve SqlTransform in Python -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-9977) Build Kafka Read on top of Java SplittableDoFn
[ https://issues.apache.org/jira/browse/BEAM-9977?focusedWorklogId=440445&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440445 ] ASF GitHub Bot logged work on BEAM-9977: Author: ASF GitHub Bot Created on: 02/Jun/20 21:06 Start Date: 02/Jun/20 21:06 Worklog Time Spent: 10m Work Description: aromanenko-dev commented on pull request #11749: URL: https://github.com/apache/beam/pull/11749#issuecomment-637806567 @boyuanzz Yes, afaik, it's used only for [performance testing](http://metrics.beam.apache.org/d/bnlHKP3Wz/java-io-it-tests-dataflow?panelId=21&fullscreen&orgId=1&from=1588539922272&to=1591131922272) on Dataflow This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440445) Time Spent: 6h 10m (was: 6h) > Build Kafka Read on top of Java SplittableDoFn > -- > > Key: BEAM-9977 > URL: https://issues.apache.org/jira/browse/BEAM-9977 > Project: Beam > Issue Type: New Feature > Components: io-java-kafka >Reporter: Boyuan Zhang >Assignee: Boyuan Zhang >Priority: P2 > Time Spent: 6h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (BEAM-9621) Python SqlTransform follow-ups
[ https://issues.apache.org/jira/browse/BEAM-9621?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Brian Hulette updated BEAM-9621: Priority: P4 (was: P2) > Python SqlTransform follow-ups > -- > > Key: BEAM-9621 > URL: https://issues.apache.org/jira/browse/BEAM-9621 > Project: Beam > Issue Type: Improvement > Components: cross-language, dsl-sql, sdk-py-core >Reporter: Brian Hulette >Assignee: Brian Hulette >Priority: P4 > Labels: stale-assigned > > Tracking JIRA for follow-up work to improve SqlTransform in Python -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (BEAM-6550) ParDo Async Java API
[ https://issues.apache.org/jira/browse/BEAM-6550?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Bharath Kumarasubramanian updated BEAM-6550: Labels: (was: stale-assigned) > ParDo Async Java API > > > Key: BEAM-6550 > URL: https://issues.apache.org/jira/browse/BEAM-6550 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core >Reporter: Xinyu Liu >Assignee: Bharath Kumarasubramanian >Priority: P2 > Time Spent: 20m > Remaining Estimate: 0h > > This ticket is to track the work on adding the ParDo async API. The > motivation for this is: > - Many users are experienced in asynchronous programming. With async > frameworks such as Netty and ParSeq and libs like async jersey client, they > are able to make remote calls efficiently and the libraries help manage the > execution threads underneath. Async remote calls are very common in most of > our streaming applications today. > - Many jobs are running on a multi-tenancy cluster. Async processing helps > for less resource usage and fast computation (less context switch). > This API has become one of the most asked Java api from SamzaRunner users. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (BEAM-6550) ParDo Async Java API
[ https://issues.apache.org/jira/browse/BEAM-6550?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17124313#comment-17124313 ] Bharath Kumarasubramanian commented on BEAM-6550: - Thanks for following up. I have a change in progress that covers API, Direct & Samza runner implementations. I ran into bunch of direct runner test failures. Will reopen the PR once I fix the tests. > ParDo Async Java API > > > Key: BEAM-6550 > URL: https://issues.apache.org/jira/browse/BEAM-6550 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core >Reporter: Xinyu Liu >Assignee: Bharath Kumarasubramanian >Priority: P2 > Labels: stale-assigned > Time Spent: 20m > Remaining Estimate: 0h > > This ticket is to track the work on adding the ParDo async API. The > motivation for this is: > - Many users are experienced in asynchronous programming. With async > frameworks such as Netty and ParSeq and libs like async jersey client, they > are able to make remote calls efficiently and the libraries help manage the > execution threads underneath. Async remote calls are very common in most of > our streaming applications today. > - Many jobs are running on a multi-tenancy cluster. Async processing helps > for less resource usage and fast computation (less context switch). > This API has become one of the most asked Java api from SamzaRunner users. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440442&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440442 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 21:03 Start Date: 02/Jun/20 21:03 Worklog Time Spent: 10m Work Description: pabloem commented on a change in pull request #11898: URL: https://github.com/apache/beam/pull/11898#discussion_r434172894 ## File path: sdks/python/apache_beam/io/gcp/bigquery.py ## @@ -1702,7 +1702,8 @@ def _get_destination_uri(self, temp_location): logging.debug("gcs_location is empty, using temp_location instead") else: raise ValueError( - '{} requires a GCS location to be provided'.format( + '{} requires a GCS location to be provided. Neither option' + '--gcs_location nor the fallback --temp_location is set.'.format( Review comment: `gcs_location` is not a pipeline option. It's a constructor argument. Can you error out as such? ## File path: sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java ## @@ -243,7 +243,8 @@ public void validate(PipelineOptions options) { } checkArgument( !Strings.isNullOrEmpty(tempLocation), -"BigQueryIO.Write needs a GCS temp location to store temp files."); +"BigQueryIO.Write needs a GCS temp location to store temp files." ++ "This can be set with option --tempLocation."); Review comment: Please document customGcs location method as well. Thanks! ## File path: sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java ## @@ -941,7 +941,8 @@ public void validate(PipelineOptions options) { String tempLocation = bqOptions.getTempLocation(); checkArgument( !Strings.isNullOrEmpty(tempLocation), -"BigQueryIO.Read needs a GCS temp location to store temp files."); +"BigQueryIO.Read needs a GCS temp location to store temp files." ++ "This can be set with option --tempLocation."); Review comment: Java also has the ability to provide a custom gcs location. Can you document it as well? https://github.com/apache/beam/blob/0a0399f71cf14ecabe7e73b6cd596325bb7ff2ea/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java#L2314-L2317 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440442) Time Spent: 40m (was: 0.5h) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 40m > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest
[jira] [Resolved] (BEAM-9390) [PostCommit_Java_PortabilityApi] [BigQuery related ITs] UnsupportedOperationException: BigQuery source must be split before being read
[ https://issues.apache.org/jira/browse/BEAM-9390?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Brian Hulette resolved BEAM-9390. - Fix Version/s: Not applicable Resolution: Won't Fix This is obsolete since the job in question was disabled in https://github.com/apache/beam/pull/11635 > [PostCommit_Java_PortabilityApi] [BigQuery related ITs] > UnsupportedOperationException: BigQuery source must be split before being read > -- > > Key: BEAM-9390 > URL: https://issues.apache.org/jira/browse/BEAM-9390 > Project: Beam > Issue Type: Bug > Components: test-failures >Reporter: Robin Qiu >Assignee: Luke Cwik >Priority: P2 > Labels: currently-failing, stale-assigned > Fix For: Not applicable > > > Failed tests: > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport > > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2eBigQueryTornadoesWithStorageApi > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryTimePartitioningClusteringIT.testE2EBigQueryClusteringTableFunction > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryTimePartitioningClusteringIT.testE2EBigQueryClusteringDynamicDestinations > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryTimePartitioningClusteringIT.testE2EBigQueryTimePartitioning > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryTimePartitioningClusteringIT.testE2EBigQueryClustering > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryToTableIT.testNewTypesQueryWithReshuffle > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryToTableIT.testLegacyQueryWithoutReshuffle > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryToTableIT.testNewTypesQueryWithoutReshuffle > > org.apache.beam.sdk.io.gcp.bigquery.BigQueryToTableIT.testStandardQueryWithoutCustom > ([https://builds.apache.org/job/beam_PostCommit_Java_PortabilityApi/4226/#showFailuresLink)] > > Example failures: > java.lang.RuntimeException: java.util.concurrent.ExecutionException: > java.lang.RuntimeException: Error received from SDK harness for instruction > -596: java.lang.UnsupportedOperationException: BigQuery source must be split > before being read at > org.apache.beam.sdk.io.gcp.bigquery.BigQuerySourceBase.createReader(BigQuerySourceBase.java:173) > at > org.apache.beam.fn.harness.BoundedSourceRunner.runReadLoop(BoundedSourceRunner.java:159) > at > org.apache.beam.fn.harness.BoundedSourceRunner.start(BoundedSourceRunner.java:146) > ... -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440440&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440440 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 20:59 Start Date: 02/Jun/20 20:59 Worklog Time Spent: 10m Work Description: pabloem commented on pull request #11898: URL: https://github.com/apache/beam/pull/11898#issuecomment-637803100 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440440) Time Spent: 0.5h (was: 20m) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 0.5h > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:312) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:292) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at org.junit.runners.ParentRunner.run(ParentRunner.java:396) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.runTestClass(JUnitTestClassExecutor.java:110) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.execut
[jira] [Resolved] (BEAM-8808) TestBigQueryOptions is never registered
[ https://issues.apache.org/jira/browse/BEAM-8808?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Brian Hulette resolved BEAM-8808. - Fix Version/s: Not applicable Resolution: Won't Fix I don't think this is actually a bug. Test options don't need to be registered since they're just set programmatically. Closing as won't fix. > TestBigQueryOptions is never registered > --- > > Key: BEAM-8808 > URL: https://issues.apache.org/jira/browse/BEAM-8808 > Project: Beam > Issue Type: Bug > Components: sdk-java-core >Reporter: Brian Hulette >Assignee: Brian Hulette >Priority: P2 > Labels: stale-assigned > Fix For: Not applicable > > > So it's not possible to set targetDataset -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Work logged] (BEAM-10178) Error messages for unspecified options should display the command line flag that needs to be specified
[ https://issues.apache.org/jira/browse/BEAM-10178?focusedWorklogId=440438&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-440438 ] ASF GitHub Bot logged work on BEAM-10178: - Author: ASF GitHub Bot Created on: 02/Jun/20 20:51 Start Date: 02/Jun/20 20:51 Worklog Time Spent: 10m Work Description: KevinGG commented on pull request #11898: URL: https://github.com/apache/beam/pull/11898#issuecomment-637798776 R: @pabloem PTAL, thx! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking --- Worklog Id: (was: 440438) Time Spent: 20m (was: 10m) > Error messages for unspecified options should display the command line flag > that needs to be specified > -- > > Key: BEAM-10178 > URL: https://issues.apache.org/jira/browse/BEAM-10178 > Project: Beam > Issue Type: Improvement > Components: sdk-java-core, sdk-py-core >Reporter: Ning Kang >Assignee: Ning Kang >Priority: P3 > Time Spent: 20m > Remaining Estimate: 0h > > An example error trace: > {code:java} > java.lang.IllegalArgumentException: BigQueryIO.Read needs a GCS temp location > to store temp files. > at > org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument(Preconditions.java:122) > at > org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$TypedRead.validate(BigQueryIO.java:762) > at > org.apache.beam.sdk.Pipeline$ValidateVisitor.enterCompositeTransform(Pipeline.java:641) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:653) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:657) > at > org.apache.beam.sdk.runners.TransformHierarchy$Node.access$600(TransformHierarchy.java:317) > at > org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:251) > at > org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:458) > at org.apache.beam.sdk.Pipeline.validate(Pipeline.java:577) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:312) > at org.apache.beam.sdk.Pipeline.run(Pipeline.java:299) > at > org.apache.beam.examples.cookbook.BigQueryTornadoes.runBigQueryTornadoes(BigQueryTornadoes.java:199) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.runE2EBigQueryTornadoesTest(BigQueryTornadoesIT.java:70) > at > org.apache.beam.examples.cookbook.BigQueryTornadoesIT.testE2EBigQueryTornadoesWithExport(BigQueryTornadoesIT.java:82) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:349) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:314) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:312) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:292) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at org.junit.runners.ParentRunner.run(ParentRunner.java:396) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.runTestClass(JUnitTestClassExecutor.java:110) > at > org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecutor.